def ttests(self, redundant_full=False): from scipy import stats # scipy doesnt auto import subpackages ALTS = ['two-sided', 'less', 'greater'] i = 0 r = {} for name, mat in listitems(self.data): c = 0 for name2, mat2 in listitems(self.data): if c <= i: c += 1 continue if name not in r: r[name] = {} pvalues = { alt: stats.ttest_ind(mat, mat2, alternative=alt)[1] for alt in ALTS } r[name][name2] = deepcopy(pvalues) if redundant_full: if name2 not in r: r[name2] = {} less = pvalues['less'] pvalues['less'] = pvalues['greater'] pvalues['greater'] = less r[name2][name] = pvalues c += 1 i += 1 return r
def acc_table(self, data): titles = { 'tf': f'Tensorflow ({100 if SANITY_SET == SanitySet.Set100 else SANITY_SET.num})', # 'ml2tf': 'MATLAB model imported into Tensorflow', 'ml': 'MATLAB (100)' } sanity_report_figdata = [] for be_key in listkeys(titles): be_data = data[be_key] if be_key in ['files', 'dest']: continue # , 'y_true' arch_rows = [] for akey, adata in listitems(be_data): if akey in ['y_true']: continue # , 'y_true' top_row = ['Arch'] ar = [akey] for ppkey, ppdata in listitems(adata): if ppkey in ['y_true']: continue top_row += [ppkey] ar += [str(int(ppdata['acc'] * 100)) + '\n' + str(int(ppdata['acc5'] * 100))] arch_rows += [ar] table = [top_row] + arch_rows sanity_report_figdata += [H3(titles[be_key])] sanity_report_figdata += [HTML_Pre(str(TextTableWrapper( data=table, col_align='c' * len(table[0]), col_valign='m' * len(table[0]) )))] if be_key == 'ml2tf': sanity_report_figdata += ['* Darius has uploaded new models that have not yet been tested'] sanity_report_figdata += [H3('ImageNet Results from Literature')] sanity_report_figdata += [HTML_Pre(str(TextTableWrapper( data=[ ['Arch', 'lit'], ['ALEX', f'?\n{int(0.847 * 100)}'], ['GNET', f'?\n{int(0.99333 * 100)}'], ['INC', f'80.4\n95.3'] ], col_align='c' * 2, col_valign='m' * 2 )))] sanity_report_figdata += [HTML_Pre(''' Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet classification with deep convolutional neural networks." Advances in neural information processing systems. 2012. Szegedy, Christian, et al. "Going deeper with convolutions." Proceedings of the IEEE conference on computer vision and pattern recognition. 2015. Improving Inception and Image Classification in TensorFlow.” Google AI Blog, 31 Aug. 2016, ai.googleblog.com/2016/08/improving-inception-and-image.html. ''')] return Div(*sanity_report_figdata)
def compile_eg(self, eg: DNN_ExperimentGroup): experiments = experiments_from_folder(eg.folder) random_exp = experiments[0] finished_archs = [] pname = 'sanity.pickle' data = { k: {} for k in listkeys(random_exp.folder[f'sanity/{pname}'].load()) } data['dest'] = eg.compile_exp_res_folder[pname].abspath for exp in eg.experiments: if exp.arch in finished_archs: continue mat = exp.folder['sanity'][pname].load() for backendkey, bedata in listitems(mat): data[backendkey][exp.arch] = bedata if 'y_true' in bedata: data[backendkey]['y_true'] = bedata['y_true'] finished_archs += [exp.arch] data['files'] = data['files'][exp.arch] return data
def fixInfs(self): for k, v in listitems(self.__dict__): if v in ['inf', '-inf']: from mlib.math import safemean, parse_inf self.__dict__[k] = parse_inf(v) elif isinstance(v, obj): v.fixInfs() return self
def count(): log('count here 1') data = { 'train': count_split("train"), 'validation': count_split("validation"), } real_data = {} for k, v in listitems(data['train']): real_data[k] = {'train': v} for k, v in listitems(data['validation']): real_data[k]['validation'] = v real_data = json.dumps(real_data, indent=2) log(f'data sample: {real_data[:20]}') Folder('_data').mkdir() File('_data/imagenet_count.json').write(real_data)
def build(self): only_two_tailed = scrunch(self.ttest_result, '-') for k, v in listitems(only_two_tailed): del only_two_tailed[k]['less'] del only_two_tailed[k]['greater'] TableData( data=dict_to_table(only_two_tailed)[1:], # remove ['','two-sided'] title=f'{self.net}: T-Test P-values Between Groupings of {method_strings[self.method_name]} Results, Grouped by {pattern_strings[self.pat]}', fontsize=40.0 ).draw(builder=self, tags=self.tags + ['table', 'PValueTable', self.pat])
def preview_dict_recurse(d, depth=1): pd = PDT + cn({}) + PDT if depth > 0: pd = {} for k, v in listitems(d): if isdict(v): pd[k] = preview_dict_recurse(v, depth - 1) else: pd[k] = PDT + cn(v) + PDT return pd
def __init__(self, *args, **kwargs): objs = list(args) if 'identified' in kwargs: identified = kwargs['identified'] del kwargs['identified'] for idd, ided in listitems(identified): ided.attributes['id'] = idd objs.append(ided) super().__init__(**kwargs) self.objs = objs
def confuse_analysis(self, data, lamb, identiy=True): @dataclass class IN_Result: backend: str arch: str pp: str y_pred: np.ndarray acts: np.ndarray def __str__(self): be = SanityAnalysis.bedict[self.backend] a = SanityAnalysis.adict[self.arch] p = SanityAnalysis.ppdict[self.pp] return f'{be}{a}{p}' in_results = [] for bekey, bedata in listitems(data): if bekey in ['files', 'dest', 'y_true']: continue for akey, arch_data in listitems(bedata): for ppkey, ppdata in listitems(arch_data): in_results += [IN_Result( backend=bekey, arch=akey, pp=ppkey, y_pred=arr(ppdata['y_pred']), acts=arr(ppdata['acts']) )] cmat = self.iconfuse( in_results, lamb, identiy=identiy ) labels = listmap( lambda r: str(r), in_results ) top = [None] + labels cmat = cmat.tolist() for i, li in enum(cmat): cmat[i] = [labels[i]] + cmat[i] cmat = [top] + cmat return cmat
def __init__(self, RESOURCES_ROOT: Folder, _DEV: bool = None): assert _DEV is not None self._DEV = _DEV self.RESOURCES_ROOT = RESOURCES_ROOT self.RESOURCES_ROOT = Folder(self.RESOURCES_ROOT) self.EXP_FOLDER = File(inspect.getfile(self.__class__)).parent self.FIG_FOLDER = Folder(self.EXP_FOLDER['figs']) self.changelist = self.EXP_FOLDER['changelist.yml'] self.VERSIONS = self.changelist self.THIS_VERSION = listitems(self.VERSIONS.load())[-1] self.ROOT = self.EXP_FOLDER['build/site']
def take_om_logs(OMP): manager = get_manager() from mlib.boot.lang import pwd from mlib.boot.stream import listitems from mlib.file import File, Folder with PipelineSection('downloading pipeline section data', log=True): OMP.get( File(PIPELINE_SECTION_FILE).rel_to(pwd()), destination=Folder('_data'), overwrite=True ) for seclabel, secdata in listitems(File(PIPELINE_SECTION_FILE).load(silent=True)): while seclabel in manager.PIPELINE_SECTIONS: seclabel = next_int_suffix(seclabel) manager.PIPELINE_SECTIONS[seclabel] = secdata
def calc_accs(self, data): y_true = [int(n.split('_')[0]) for n in data['files']] data['ml']['y_true'] = y_true if SANITY_SET == SanitySet.Set100: data['tf']['y_true'] = y_true else: data['tf']['y_true'] = (arr(data['tf']['y_true']) - 1).tolist() # else: # y_true = [] # for i in range(1000): # y_true.extend([i] * 50) # y_true = y_true[0:SANITY_SET.num] # data['tf']['y_true'] = y_true for bekey, bedata in listitems(data): if bekey in ['files', 'dest']: continue # , 'y_true' for akey, arch_data in listitems(bedata): if akey in ['y_true']: continue # , 'y_true' for ppkey, ppdata in listitems(arch_data): if ppkey in ['y_true']: continue y_true = bedata['y_true'] y_pred = [maxindex(ppdata[i]) for i in range(len(ppdata))] acc = 1 - error_rate_core(y_true, y_pred) top5_score = 0 for i in range(len(y_pred)): preds = maxindex(ppdata[i], num=5) if y_true[i] in preds: top5_score += 1 acc5 = top5_score / len(y_pred) pp = { 'acts' : ppdata, 'y_pred': y_pred, 'acc' : acc, 'acc5' : acc5 } arch_data[ppkey] = pp return data
def new_format(bibliography, id_use_order): bibdiv = Div() num = 0 for entry_id, entry in listitems(bibliography): num += 1 entrydiv = Div(id=entry_id) entrydiv += (str(num) + '. ' + _format_entry(entry)) # unconventional entrydiv += ' (' entrydiv += Hyperlink('link', entry['murl'], target='_blank') entrydiv += ')' bibdiv += entrydiv bibdiv += Br return bibdiv
def __new__(mcs, name, bases, attrs): cls = super().__new__(mcs, name, bases, attrs) def replacement(): raise NotImplementedError # for k, v in listitems(asdict(cls.STATIC)): # cls.__setattr__(k, v) for k, v in listitems(cls.__dict__): if isinstance(v, Abstract): setattr(cls, k, property(replacement)) if ABC not in bases: err('bad') if ABC not in bases: cls.__meta_post_init__() return cls
def examples(self): examples = [] if len(self.imds) > 0: for c, lab in listitems(self.class_label_map): break_outer = False for batch_pair in self.gen(): for img_pair in zip(*batch_pair): if img_pair[1] == lab: examples += [(c, img_pair[0])] break_outer = True break if break_outer: break # breakpoint() return examples
def pub_print_warn(): from mlib.boot.mlog import warnings, log, LOG_LEVEL, LogLevel, info import mlib.boot.mlog if not mlib.boot.mlog.QUIET and LOG_LEVEL.value >= LogLevel.WARN.value: log(f'{len(warnings)=}') if len(warnings) > 0: log('WARNINGS:') warning_map = {} for w in warnings: if w in warning_map: warning_map[w] += 1 else: warning_map[w] = 1 for k, v in listitems(warning_map): log(f'\t{k} ({v} occurrences)') else: info('NO WARNINGS!')
def nnet_main(FLAGS): FLAGS.epochs = int(FLAGS.epochs) _IMAGES_FOLDER = pwdf()['_images'].mkdirs(mker=True) HUMAN_IMAGE_FOLDER = pwdf()['_images_human'].mkdirs(mker=True) if FLAGS.gen: gen_main(FLAGS, _IMAGES_FOLDER, HUMAN_IMAGE_FOLDER) if FLAGS.salience: class_map = {'dog': 0, 'cat': 1} dogcatfolder = DATA_FOLDER.resolve( 'tf_bug1/dogscats') # thousands, downloaded from kaggle ntrain_folder = dogcatfolder['ntrain'] dummy_folder = dogcatfolder['dummy'].mkdir() ntrain_folder.deleteIfExists().mkdir() for k, v in listitems(class_map): log('getting files') files = dogcatfolder['Training'][k].files.tolist() random.shuffle(files) log('looping files') for im in files[0:FLAGS.ntrain]: im.copyinto(ntrain_folder[k]) # NTEST = 100 NTEST = int(FLAGS.batchsize / len(listitems(class_map))) ntest_folder = dogcatfolder['ntest'] ntest_folder.deleteIfExists().mkdir() for k, v in listitems(class_map): log('getting files') files = dogcatfolder['Testing'][k].files.tolist() random.shuffle(files) log('looping files') for im in files[0:NTEST]: im.copyinto(ntest_folder[k]) GPU_TRAIN_FOLDER = NN_Data_Dir(ntrain_folder.abspath) GPU_TEST_FOLDER = NN_Data_Dir(ntest_folder.abspath) GPU_RSA_FOLDER = NN_Data_Dir(dummy_folder.abspath) else: GPU_IMAGES_FOLDER = _IMAGES_FOLDER[f'gpu{FLAGS.mygpufordata}'] GPU_TRAIN_FOLDER = NN_Data_Dir( GPU_IMAGES_FOLDER[f'Training/{FLAGS.ntrain}']) GPU_TEST_FOLDER = NN_Data_Dir(GPU_IMAGES_FOLDER[f'Testing']) GPU_RSA_FOLDER = NN_Data_Dir(GPU_IMAGES_FOLDER[f'RSA']) if FLAGS.deletenorms: GPU_TRAIN_FOLDER.delete_norm_dir() GPU_TEST_FOLDER.delete_norm_dir() GPU_RSA_FOLDER.delete_norm_dir() nn_init_fun.NRC_IS_FINISHED( ) # must be invoked this way since value of function changes if FLAGS.normtrainims: err('im doing this?') nnstate.use_reduced_map = len(GPU_TRAIN_FOLDER.files) != len( GPU_TEST_FOLDER.files) datasetTrain, _ = load_and_preprocess_ims( TRAIN_TEST_SPLIT=1, data_dir=GPU_TRAIN_FOLDER, normalize_single_images=FLAGS.normtrainims) _, datasetVal = load_and_preprocess_ims( TRAIN_TEST_SPLIT=0, data_dir=GPU_TEST_FOLDER, normalize_single_images=FLAGS.normtrainims) _, datasetTest = load_and_preprocess_ims( TRAIN_TEST_SPLIT=0, data_dir=GPU_RSA_FOLDER, normalize_single_images=FLAGS.normtrainims) if FLAGS.proto_model: net = PROTO() else: net = ARCH_MAP[FLAGS.arch]( max_num_classes=len(listkeys(datasetTest.class_label_map))) net.build(FLAGS) [a.after_build(FLAGS, net) for a in ANALYSES(mode=AnalysisMode.PIPELINE)] net.train_data = datasetTrain.prep(net.HEIGHT_WIDTH, net.PP) net.val_data = datasetVal.prep(net.HEIGHT_WIDTH, net.PP) net.test_data = datasetTest.prep(net.HEIGHT_WIDTH, net.PP) return trainTestRecord(net, '', FLAGS.epochs)
def after_build(self, FLAGS, tf_net: ModelWrapper): if tf_net.pretrained and 'SANITY' in FLAGS.pipeline: IN_files = tf_net.IMAGE_NET_FOLD['unknown'].files r = { 'files': IN_files.map(__.name), 'ml' : {}, 'tf' : {} # 'ml2tf': {} } # ml2tf_net = tf_net.from_ML_vers().build() for pp_name, pp in listitems(preprocessors(tf_net.hw)): # , r['ml2tf'][pp_name] = if SANITY_SET != SanitySet.Set100: import tensorflow as tf DATA_FOLDER.resolve('ImageNet/output') # root = Folder('/matt/data/ImageNet/output_tf') filenames = root.glob('validation*').map(lambda f: f.abspath).tolist() r[f'tf']['y_true'] = [None] * SANITY_SET.num ds = tf.data.TFRecordDataset(filenames) image_feature_description = { 'image/height' : tf.io.FixedLenFeature([], tf.int64), 'image/width' : tf.io.FixedLenFeature([], tf.int64), 'image/colorspace' : tf.io.FixedLenFeature([], tf.string), 'image/channels' : tf.io.FixedLenFeature([], tf.int64), 'image/class/label' : tf.io.FixedLenFeature([], tf.int64), 'image/class/synset': tf.io.FixedLenFeature([], tf.string), 'image/class/text' : tf.io.FixedLenFeature([], tf.string), # 'image/object/bbox/xmin' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/xmax' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/ymin' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/ymax' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/label': tf.io.FixedLenFeature([], tf.int64), 'image/format' : tf.io.FixedLenFeature([], tf.string), 'image/filename' : tf.io.FixedLenFeature([], tf.string), 'image/encoded' : tf.io.FixedLenFeature([], tf.string), } imap = {} # current_i = -1 def input_gen(): for i, raw_record in enum(ds): example = tf.io.parse_single_example(raw_record, image_feature_description) r[f'tf']['y_true'][i] = example['image/class/label'].numpy() # return tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() rrr = tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # current_i = current_i + 1 imap[i] = rrr yield rrr igen = input_gen() def get_input(index): # log(f'trying to get index {index}') # log(f'current indices range from {safemin(list(imap.keys()))} to {safemax(list(imap.keys()))}') if index not in imap: # log('coud not get it') next(igen) return get_input(index) else: # log('got it!') rr = imap[index] for k in list(imap.keys()): if k < index: del imap[k] return rr # for raw_record in ds: # example = tf.io.parse_single_example(raw_record, image_feature_description) # r[f'tf']['y_true'][index] = example['image/class/label'].numpy() # return tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # yield example # y_true = [] # ifs_for_labels = input_files() # for i in range(SANITY_SET.num): # y_true.append(next(ifs_for_labels)['image/class/label'].numpy()) # r[f'tf']['y_true'] = y_true # def input_file_raws(): # gen = input_files() # for example in gen: # yield tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # IN_files = input_file_raws() IN_files = get_input # ALL = 49999 # TEST = 10 r[f'tf'][pp_name] = simple_predict( tf_net, # ,ml2tf_net pp, IN_files, length=SANITY_SET.num, # length=50000 ) # else: # y_pred = V_Stacker() # # root = Folder('/xboix/data/ImageNet/raw-data/validation') # root = Folder('/matt/data/ImageNet/output') # filenames = root.glob('validation*').tolist() # ds = tf.data.TFRecordDataset(filenames) # # for subroot in root: # for imgfile in subroot: # y_pred += tf_net.net.predict(dset, verbose=1) # r[f'tf'][pp_name] = y_pred # if tf_net.OUTPUT_IDX is not None: # r[f'tf'][pp_name] = r[f'tf'][pp_name][tf_net.OUTPUT_IDX] for pp_name in ['none', 'divstd_demean', 'unit_scale', 'demean_imagenet', 'DIIL']: r['ml'][pp_name] = Folder('_data/sanity')[tf_net.label][ f'ImageNetActivations_Darius_{pp_name}.mat' ].load()['scoreList'] # this was for before when darius was using the old order of activations # [ # File('image_net_map.p').load(), : # ] save_dnn_data( data=r, domain='sanity', nam='sanity', ext='pickle' )
def _get_cfg(self): assert len(self.registered_flags()) == len(set(self.registered_flags())) freecfg = File('freecfg.json').load() prof = 'default' cfg = 'default' changes = {} flags = [] cell = False for idx, a in enum(sys.argv): if idx == 0: continue elif a.startswith('--'): k, v = tuple(a.replace('--', '').split('=')) if k == 'tic': continue changes[k] = v elif a.startswith('-'): k, v = tuple(a.replace('-', '').split('=')) if k == 'prof': prof = v elif k == 'cfg': cfg = v else: err('arguments with one dash (-) need to be prof= or cfg=') elif cell or a in self.registered_flags(): if a == 'cell': cell = True flags += [a] else: err(f'invalid argument:{a} please see README') prof = Project.CFG['profiles'][prof] cfg = Project.CFG['configs'][cfg] for k in listkeys(prof): if k in listkeys(cfg): prof_ntrain = prof[k] for i, n in enum(cfg[k]): if isstr(n) and n[0] == 'i': cfg[k][i] = prof_ntrain[int(n[1])] cfg = {**prof, **cfg, 'FLAGS': flags} for k, v in listitems(changes): if k not in listkeys(cfg): err(f'invalid -- arguments: {k}, please see {Project.CFG.name} for configuration options') if isinstance(cfg[k], bool): v = bool(int(v)) cfg[k] = v # hello from freecomp for k, v in listitems(freecfg): log(f'freecfg: {k}:{v}') cfg[k] = v # cfg['EPOCHS'] = freecfg['EPOCHS'] return obj(cfg)
def copy_and_set(d, **kvs): c = deepcopy(d) for k, v in listitems(kvs): c[k] = v return c
def arg_tags(**kwargs): return Div(*[HTML_P( str(v), id=str(k), ) for k, v in listitems(kwargs)], style={'display': 'none'})
def __init__(self, mode: AnalysisMode): self.mode = mode self.cells = [] for name, m in listitems(self.__class__.__dict__): if iscell(m): self.cells += [m]
def _super_run(self, cfg_overrides=None): if cfg_overrides is None: cfg_overrides = {} from mlib.web.html import HTMLObject from mlib.web import shadow from mlib.proj.stat import py_deps, class_model_report self.prep_log_file(None) cfg = self._get_cfg() for k, v in listitems(cfg_overrides): setattr(cfg, k, v) self.cfg = cfg if ismac(): # why was this so important again? # self.daily( # self.write_reqs # ) # and this? # self.daily( # enable_py_call_graph, # Project.PYCALL_FILE # ) pass with WOLFRAM: if 'build' in cfg.FLAGS and self.mbuild: assert len(cfg.FLAGS) == 1 err('anything that depends on mlib has to push that too') build() write_README(self) self.push() elif 'readme' in cfg.FLAGS: assert len(cfg.FLAGS) == 1 write_README(self) elif any(x in cfg.FLAGS for x in self.clear_clear_cache_flags): assert len(cfg.FLAGS) == 1 clear_cell_cache() elif 'cell' in cfg.FLAGS: assert len(cfg.FLAGS) == 3 analysisFlag = cfg.FLAGS[1] cellName = cfg.FLAGS[2] analysisO = arr(ANALYSES(AnalysisMode.CELL)).first( lambda o: cn(o) == analysisFlag or mn(o).split('.')[-1] == analysisFlag ) cell = getattr(analysisO, cellName) if cell.inputs[0] is not None: inputs = cell.load_cached_input(analysisO) cell(*inputs) else: cell() else: if ismac(): # need to have dailyOrFlag # its asking me to delete nap online. not sure if I can do this so I'm commenting this out for now. # self.daily( # wolf_manager.manage # ) run_in_daemon( pingChecker) # this line wasnt nested in ismac b4 but got an error in openmind last time I ran log('about to run with cfg') self.run(cfg) self.daily( class_model_report, HTMLObject ) if ismac(): self.daily( # atexit.register, py_deps, main_mod_file(), Project.PYDEPS_OUTPUT ) # atexit.register( if ismac() and shadow.enabled: # not doing this on openmind yet because it erases docs_local/results.html which I am using. need to fix this though shadow.build_docs() # ) if ismac(): reloadIdeaFilesFromDisk()
def r(self, d): s = self for k, v in listitems(d): s = s.replace(k, v) return s
def gen_main(FLAGS, _IMAGES_FOLDER, HUMAN_IMAGE_FOLDER): log('in gen!') _IMAGES_FOLDER.clearIfExists() HUMAN_IMAGE_FOLDER.clearIfExists() gen_cfg = FLAGS.cfg_cfg['gen_cfg'] # these numbers might be lower now that I'm excluding images that aren't squares # nevermind. I think trying to only take squares didn't work cats = ['Egyptian cat', # >=200 'Siamese cat', # 196 'Persian cat', # >=200 'tiger cat', # 182 'tabby cat'] # >=100 dogs = [ 'Afghan hound', # >=200 'basset hound', # >=200 'beagle', # 198 'bloodhound', # 199 'bluetick' # >=100 ] classes = cats + dogs not_trained = ['tabby cat', 'bluetick'] for d in dogs: nnstate.reduced_map[d] = 'dog' for c in cats: nnstate.reduced_map[c] = 'cat' if FLAGS.salience: log('in gen salience!') root = DATA_FOLDER.resolve('ImageNet/output_tf') filenames = root.glob('train*').map(lambda x: x.abspath).tolist() # validation import tensorflow as tf ds = tf.data.TFRecordDataset(filenames) # for subroot in root: # for imgfile in subroot: image_feature_description = { 'image/height' : tf.io.FixedLenFeature([], tf.int64), 'image/width' : tf.io.FixedLenFeature([], tf.int64), 'image/colorspace' : tf.io.FixedLenFeature([], tf.string), 'image/channels' : tf.io.FixedLenFeature([], tf.int64), 'image/class/label' : tf.io.FixedLenFeature([], tf.int64), 'image/class/synset': tf.io.FixedLenFeature([], tf.string), 'image/class/text' : tf.io.FixedLenFeature([], tf.string), # 'image/object/bbox/xmin' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/xmax' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/ymin' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/ymax' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/label': tf.io.FixedLenFeature([], tf.int64), 'image/format' : tf.io.FixedLenFeature([], tf.string), 'image/filename' : tf.io.FixedLenFeature([], tf.string), 'image/encoded' : tf.io.FixedLenFeature([], tf.string), } # imap = {} # current_i = -1 # def input_gen(): log('looping imagenet') _IMAGES_FOLDER[f'Training/{FLAGS.REGEN_NTRAIN}'].mkdirs() _IMAGES_FOLDER['Testing'].mkdirs() # classes = [ # 'barn spider', # 'garden spider', # 'black widow', # 'wolf spider', # 'black and gold garden spider', # # 'emmet' ,#ant # 'grasshopper', # 'cricket', # 'stick insect', # 'cockroach' # ] class_count = {cn: 0 for cn in classes} for i, raw_record in enum(ds): example = tf.io.parse_single_example(raw_record, image_feature_description) # r[f'tf']['y_true'][i] = example['image/class/label'].numpy() # return tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # if example['image/height'] != example['image/width']: # continue if i % 100 == 0: log(f'on image {i}') classname = utf_decode(example['image/class/text'].numpy()) for cn in classes: if (cn in classname) and ( class_count[cn] < (FLAGS.REGEN_NTRAIN if cn in not_trained else (FLAGS.REGEN_NTRAIN * 2))): log(f'saving {cn} {class_count[cn] + 1}') rrr = tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() if class_count[cn] < FLAGS.REGEN_NTRAIN: _IMAGES_FOLDER['Testing'][cn][f'{i}.png'].save(rrr) else: _IMAGES_FOLDER[f'Training/{FLAGS.REGEN_NTRAIN}']['dog' if cn in dogs else 'cat'][ f'{i}.png'].save(rrr) class_count[cn] += 1 break break_all = True for cn, cc in listitems(class_count): if (cn in not_trained and cc != FLAGS.REGEN_NTRAIN) or ( cn not in not_trained and cc != (FLAGS.REGEN_NTRAIN * 2)): break_all = False if break_all: break # current_i = current_i + 1 # imap[i] = rrr # yield rrr # igen = input_gen() # def get_input(index): # # log(f'trying to get index {index}') # # log(f'current indices range from {safemin(list(imap.keys()))} to {safemax(list(imap.keys()))}') # if index not in imap: # # log('coud not get it') # next(igen) # return get_input(index) # else: # # log('got it!') # rr = imap[index] # for k in list(imap.keys()): # if k < index: # del imap[k] # return rr # # for raw_record in ds: # # example = tf.io.parse_single_example(raw_record, image_feature_description) # # r[f'tf']['y_true'][index] = example['image/class/label'].numpy() # # return tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # # yield example # # y_true = [] # # ifs_for_labels = input_files() # # for i in range(SANITY_SET.num): # # y_true.append(next(ifs_for_labels)['image/class/label'].numpy()) # # r[f'tf']['y_true'] = y_true # # def input_file_raws(): # # gen = input_files() # # for example in gen: # # yield tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # # IN_files = input_file_raws() # IN_files = get_input # test_class_pairs = [ # pair for pair in chain(*[ # ( # SymAsymClassPair(n, False), # SymAsymClassPair(n, True) # ) for n in ints(np.linspace(0, 10, 6)) # ]) # ] # class_pairs = [ # SymAsymClassPair(0, False), # SymAsymClassPair(4, False) # ] # human_class_pairs = [ # SymAsymClassPair(0, False), # SymAsymClassPair(2, False), # SymAsymClassPair(4, False), # SymAsymClassPair(6, False), # SymAsymClassPair(8, False) # ] # gen_cfg = FLAGS.cfg_cfg['gen_cfg'] # gen_images( # folder=HUMAN_IMAGE_FOLDER['TimePilot'], # class_pairs=human_class_pairs, # ims_per_class=10 # ) else: test_class_pairs = [ pair for pair in chain(*[ ( SymAsymClassPair(n, False), SymAsymClassPair(n, True) ) for n in ints(np.linspace(0, 10, 6)) ]) ] class_pairs = [ SymAsymClassPair(0, False), SymAsymClassPair(4, False) ] human_class_pairs = [ SymAsymClassPair(0, False), SymAsymClassPair(2, False), SymAsymClassPair(4, False), SymAsymClassPair(6, False), SymAsymClassPair(8, False) ] gen_images( folder=HUMAN_IMAGE_FOLDER['TimePilot'], class_pairs=human_class_pairs, ims_per_class=10 ) gen_images( folder=_IMAGES_FOLDER['RSA'], class_pairs=test_class_pairs, ims_per_class=10, # ims_per_class=1 ) gen_images( folder=_IMAGES_FOLDER['Testing'], class_pairs=test_class_pairs, ims_per_class=10, # ims_per_class=500, # ims_per_class=1 ) # for n in (25, 50, 100, 150, 200, 1000): for n in (10,): gen_images( folder=_IMAGES_FOLDER['Training'][n], class_pairs=class_pairs, ims_per_class=n ) log('doing thing with _temp_ims') with mlib.file.TempFolder('_temp_ims') as temp: log('temp_ims_1') if temp.exists and temp.isdir: temp.clear() log('temp_ims_2') temp.mkdirs() log('temp_ims_3') [_IMAGES_FOLDER.copy_to(temp[f'gpu{i + 1}']) for i in range(gen_cfg['num_gpus'])] log('temp_ims_4') _IMAGES_FOLDER.clear() log('temp_ims_5') [temp[f'gpu{i + 1}'].moveinto(_IMAGES_FOLDER) for i in range(gen_cfg['num_gpus'])] log('temp_ims_6') log('finished thing with _temp_ims') nn_init_fun.NRC_IS_FINISHED() # must be invoked this way since value of function changes
def set_defaults(d, **kwargs): for k, v in listitems(kwargs): if k not in d: d[k] = v
def __init__(self, *args, **attributes): for k, v in listitems(attributes): if k == 'style' and isdictsafe(v): attributes[k] = CSS_Style_Attribute(**v) self.attributes = attributes
def __repr__(self): s = '' for k, v in listitems(self._style): s += f'{k}: {v}; ' return s
def _log_plot(log_data, savetofile, checkpoint_lines: List[str], pipeline_sections): import matplotlib.pyplot as plt # 1 SECOND IMPORT important_text = [] important_time = [] for lin, file_line, t in log_data: for cl in checkpoint_lines: if cl in file_line: important_text.append(cl) important_time.append(t) @dataclass class LoggedPipelineSection: start: float end: float thread: str process: str pid: str label: str # subsections: Optional[List] = None # does do anything yet index: int = -1 # MUST SET LATER source: Optional[int] = None # might set later sourceSec: Optional = None x: int = 0 # set later time_amount: Optional[float] = None time_rel: Optional[float] = None time_amount_rel: Optional[float] = None y_center: Optional[float] = None color: str = 'orange' loggedSections = [] for sec, v in listitems(pipeline_sections): if v['start'] and v['end']: loggedSections.append(LoggedPipelineSection( start=v['start'], end=v['end'], label=sec, thread=v['thread'], pid=v['pid'], process=v['process'] )) total = log_data[-1][2] important_text = [shorten_str(s, 20) for s in important_text] fig, axs = plt.subplots(nrows=1) table_ax = axs table_ax.set_axis_off() important_time = [round(t, 2) for t in important_time] if important_time: table = table_ax.table( cellText=[[str(t)] for t in important_time], rowLabels=important_text, colLabels=['time'], rowColours=["palegreen"] * (len(important_text) + 1), colColours=["palegreen"] * 2, colWidths=[0.5, 0.5], cellLoc='center', loc='center' ) table_ax.set_title('Important Logs', fontweight="bold") time_amounts = [] time_rels = [] time_amount_rels = [] y_centers = [] last = 0 for t in important_time: time_amounts.append(t - last) time_rels.append(t / total) time_amount_rels.append(time_amounts[-1] / total) y_centers.append(time_rels[-1] - (time_amount_rels[-1] / 2)) last = t sizes = important_time loggedSectionsTotal = loggedSections[0].end - loggedSections[0].start for i, sec in enum(loggedSections): sec.time_amount = sec.end - sec.start # no need for time_rel? sec.time_amount_rel = sec.time_amount / loggedSectionsTotal sec.y_center = (((sec.end - (sec.time_amount / 2)) - loggedSections[0].start) / loggedSectionsTotal) sec.index = i loggedSections[0].y_center = 0.5 for sec in loggedSections: candidates = [] for secsec in loggedSections: if sec.start > secsec.start: candidates.append(secsec) candidates2 = [] for cand in candidates: if sec.end < cand.end: candidates2.append(cand) elif sec.start > secsec.end: pass # OVERLAP! # assert sec.start > secsec.end # throws error if there is overlap but not nesting if candidates2: secsec = max(candidates2, key=lambda x: x.start) sec.source = secsec.index sec.sourceSec = secsec def count_recurse(sec): if sec.sourceSec: return 1 + count_recurse(sec.sourceSec) else: return 0 for sec in loggedSections: sec.x = count_recurse(sec) colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] while len(colors) < len(sizes): colors = colors + colors colors = colors[:len(sizes)] if important_text: plt.savefig(savetofile.abspath) plt.clf() maxX = max([sec.x for sec in loggedSections]) xstep = normX = 1 / maxX for sec in loggedSections: sec.x = sec.x / maxX labels = [sec.label for sec in loggedSections] values = [sec.time_amount for sec in loggedSections if sec.source is not None] if True: for i in itr(labels): if i > 0: labels[i] = labels[i] + f' ({format_sec_dur(values[i - 1])})' labels[0] = labels[0] + f' ({format_sec_dur(loggedSections[0].time_amount)})' jitter_step = xstep / 10 keepJittering = True while keepJittering: for sec, secsec in unique_pairs(loggedSections): if sec.x == secsec.x: if sec.thread != secsec.thread or sec.process != secsec.process or sec.pid != secsec.pid: secsec.color = 'blue' secsec.x += jitter_step break keepJittering = False import plotly.graph_objects as go fig = go.Figure(data=[go.Sankey( # arrangement="fixed", # no cutoff, but overlap arrangement="snap", # no overlap, but cutoff # arrangement = "perpendicular", # overlap and cutoff (less of both) # arrangement="freeform",# both overlap and cutoff node=dict( pad=15, thickness=20, line=dict(color="black", width=0.5), label=labels, y=[sec.y_center for sec in loggedSections], x=(arr([sec.x for sec in loggedSections]) * 1.0).tolist(), color=[sec.color for sec in loggedSections] ), link=dict( source=[sec.source for sec in loggedSections if sec.source is not None], target=list(range(1, (len(loggedSections)))), value=values ))]) fig.update_layout( font_size=20, ) html = _get_fig(fig, full_html=True, include_plotlyjs=True) File(savetofile).res_pre_ext("_sankey").resrepext('html').write(html)
def __init__(self, **kwargs): for k, v in listitems(kwargs): self.__setattr__(k, v)