def separate_comp_mat_by_classes_compared( normalized1: ComparisonMatrix, net: str, arch: str, method_name: str, sim_string: str, pattern: str ) -> StatisticalArrays: simsets = deepcopy(SYM_CLASS_SET_PATTERN_GROUPINGS[pattern]) simkeys = listkeys(SYM_CLASS_SET_PATTERN_GROUPINGS[pattern])[1:] for k in simsets: simsets[k] = arr() # rather than list average = np.nanmean(normalized1) for i, c in enum(RSA_CLASSES): for ii, cc in enum(RSA_CLASSES): if ii > i: continue comp_mat = normalized1[c, cc] normalized2 = arr([num for num in flatten(comp_mat).tolist() if not isnan(num)]) / average groupname1 = simkeys[SYM_CLASS_SET_PATTERNS[pattern][i]] groupname2 = simkeys[SYM_CLASS_SET_PATTERNS[pattern][ii]] if groupname1 == groupname2: simsets[groupname1] += flatten(normalized2) else: simsets['AC'] += flatten(normalized2) return StatisticalArrays( ylabel=f'{sim_string} Score ({method_strings[method_name]}) ({pattern_strings[pattern]})', xlabel='Class Comparison Group', data={k: v for k, v in simsets.items()}, title_suffix=f'{net}:{RSA_LAYERS[arch]} ({method_name}) ({pattern})' )
def cmat_html_table(self, cmat, vectorfun=None, int_thresh=None): data = cmat if vectorfun is None else vectorfun(cmat) div = Div() if int_thresh is not None: for ri, r in enum(data): for ci, e in enum(r): style = { 'text-align': 'center' } if isint(e) and e >= int_thresh: style.update({ 'color': 'blue' }) data[ri, ci] = HTML_Pre(str(e), style=style).getCode(None, None) div += HTML_Pre(self.disp_dicts(self.bedict, self.adict, self.ppdict)) div += Table( *[TableRow( *[DataCell( str(e), style={'border': '1px solid white'} if ri > 0 and ci > 0 else {} ) for ci, e in enum(row)], style={'border': '1px solid white'} ) for ri, row in enum(data)], style={'border': '1px solid white'} ) return div
def non_daemon_process_map(fun, iterator, manager=None): from multiprocessing import Process if manager is None: with multiprocessing.Manager() as manager: r = manager.dict() def funfun(aa, ii): r[ii] = fun(a) ps = [] for i, a in enum(iterator): r[i] = None p = Process(target=funfun, args=(a, i), daemon=False) p.start() ps.append(p) [p.join() for p in ps] return [v for v in r.values()] else: r = manager.dict() def funfun(aa, ii): assert crunch.get_manager( ).PIPELINE_SECTIONS["Startup"] is not None r[ii] = fun(a) ps = [] for i, a in enum(iterator): r[i] = None p = Process(target=funfun, args=(a, i), daemon=False) assert crunch.get_manager( ).PIPELINE_SECTIONS["Startup"] is not None p.start() ps.append(p) [p.join() for p in ps] return [v for v in r.values()]
def rel_mat(fun, a1, a2=None): if a2 is None: a2 = a1 import numpy as np r = np.zeros((len(a1), len(a2))) for i, e in enum(a1): for ii, ee in enum(a2): r[i, ii] = fun(e, ee) return r
def iconfuse(self, li, lamb, identiy=True): cmat = nans(len(li)) for ri, r1 in enum(li): for ci, r2 in enum(li): if not identiy or (ri >= ci): same_count = lamb(r1, r2) cmat[ri, ci] = same_count else: cmat[ri, ci] = 0 return cmat
def temp_map_filenames(self): indexs = [] log('loading ims...') old_ims = [f.load() for f in Folder('_ImageNetTesting_old')] new_ims = [f.load() for f in Folder('_ImageNetTesting/unknown')] for oi, new_im in enum(new_ims): log(f'checking new im {oi}...') for i, old_im in enum(old_ims): if np.all(old_im == new_im): log(f'\tfound! @ {i}') indexs += [i] break assert len(indexs) == oi + 1 File('image_net_map.p').save(indexs) return None
def count_split(spl): data = {} root = DATA_FOLDER.resolve('ImageNet/output_tf') filenames = root.glob(f'{spl}*').map( lambda x: x.abspath).tolist() # validation ds = tf.data.TFRecordDataset(filenames) image_feature_description = { 'image/height': tf.io.FixedLenFeature([], tf.int64), 'image/width': tf.io.FixedLenFeature([], tf.int64), 'image/colorspace': tf.io.FixedLenFeature([], tf.string), 'image/channels': tf.io.FixedLenFeature([], tf.int64), 'image/class/label': tf.io.FixedLenFeature([], tf.int64), 'image/class/synset': tf.io.FixedLenFeature([], tf.string), 'image/class/text': tf.io.FixedLenFeature([], tf.string), 'image/format': tf.io.FixedLenFeature([], tf.string), 'image/filename': tf.io.FixedLenFeature([], tf.string), 'image/encoded': tf.io.FixedLenFeature([], tf.string), } log('looping imagenet') for i, raw_record in enum(ds): example = tf.io.parse_single_example(raw_record, image_feature_description) if i % 100 == 0: log(f'on image {i}') classname = utf_decode(example['image/class/text'].numpy()) if classname not in data: data[classname] = 1 else: data[classname] += 1 return data
def build_net(self, FLAGS): import tensorflow as tf LRN, PoolHelper = gnet_layer_classes() if self.file.ext == '.h5': self.net = tf.keras.models.load_model(self.file.abspath, custom_objects={ 'PoolHelper': PoolHelper, 'LRN': LRN }) elif self.file.ext == 'onnx': if ismac(): onnx_tf = f'{HOME}/miniconda3/envs/dnn/bin/onnx-tf' else: onnx_tf = f'matt/miniconda3/envs/dnn/bin/onnx-tf' out = self.file.res_pre_ext("pb") eshell( f'{onnx_tf} convert -i {self.file.abspath} -o {out.abspath}') # onnx-tf convert -i /path/to/input.onnx -o /path/to/output.pb else: err('') if len(self.net.outputs) > 1: found = False for i, o in enum(self.net.outputs): if 'prob' in o.name: assert not found self.OUTPUT_IDX = i found = True assert found
def input_gen(): for i, raw_record in enum(ds): example = tf.io.parse_single_example(raw_record, image_feature_description) r[f'tf']['y_true'][i] = example['image/class/label'].numpy() # return tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() rrr = tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # current_i = current_i + 1 imap[i] = rrr yield rrr
def _after_thing(datagen, nam): log('saving examples') exs = datagen.examples() for idx, ex in enum(exs): save_dnn_data( resampleim( ex[1], 100, 100, 3 ), # it was taking up to 3 seconds before with large images nam, ex[0], 'png') log('finished saving examples')
def run_and_save_rsa(nam, mat1, layer_name=None, layer_i=None): index_to_cn = {v: k for k, v in TEST_CLASS_MAP.items()} feature_matrix = FeatureMatrix( mat1, clas_set, [Class(index_to_cn[iii], iii) for iii, yt in enum(y_true)]) feature_matrix.sort_by_class_name() fd = feature_matrix.compare(rsa_norm).image_plot() tit = f'L2-{nam}' fd.title = f'{tit} ({nnstate.FLAGS.arch}{nnstate.FLAGS.ntrain}E{ei + 1})' if nam == 'Inter': fd.title = f'{fd.title}(Layer{layer_i}:{layer_name})' save_dnn_data(fd, tit, f'CM{ei + 1}', 'mfig')
def get_or_set_default(self, default, *keys): current_v = self for i, k in enum(keys): islast = i == len(keys) - 1 if k in listkeys(current_v): current_v = current_v[k] elif islast: current_v[k] = default if not self.just_sync_at_end: self.push() return default else: err(f'need to set root default first: {k}') return current_v
def recurse_rep_itr(l, o, n, use_is=False): c = 0 for idx, e in enum(l): if is_non_str_itr(e): e, cc = recurse_rep_itr(e, o, n, use_is=use_is) c += cc if use_is: b = e is o else: b = e == o if b: l[idx] = n c += 1 elif isdict(e): l[idx], cc = recurse_rep_dict(e, o, n, use_is=use_is) c += cc return l, c
def my_except_hook(exctype, value, tb): if False: exceptions.append((exctype, value, tb)) if exctype != Short_MException: listing = traceback.format_exception(exctype, value, tb) if False: # DEBUG # if not ismac(): for i, line in enum(listing): if line.startswith(' File "'): linux_path = line.split('"')[1] mac_path = pwd() + linux_path listing[i] = listing[i].replace(linux_path, mac_path) from mlib.proj.struct import REMOTE_CWD listing[i] = listing[i].replace(REMOTE_CWD, '') if exctype == MException: del listing[-2] listing[-2] = listing[-2].split('\n')[0] + '\n' listing[-1] = listing[-1][0:-1] print("".join(listing), file=sys.stderr) print('ERROR ERROR ERROR') # coming back here after a long time so... (these werent here before) print(value) for line in listing: print(line) # exit_for_real(1) #bad. with this, one exception thrown in pdb caused process to exit sys.__excepthook__(exctype, value, tb) else: sys.__excepthook__(exctype, value, tb) sys.exit(1) if exctype == RelayException: print(value) else: sys.__excepthook__(exctype, value, tb) if auto_quit_on_exception: print(ERROR_EXIT_STR) # sys.exit(1) # I think I've mismanaged all my threads... if auto_quit_on_exception: import os; os._exit(1)
def confuse_analysis(self, data, lamb, identiy=True): @dataclass class IN_Result: backend: str arch: str pp: str y_pred: np.ndarray acts: np.ndarray def __str__(self): be = SanityAnalysis.bedict[self.backend] a = SanityAnalysis.adict[self.arch] p = SanityAnalysis.ppdict[self.pp] return f'{be}{a}{p}' in_results = [] for bekey, bedata in listitems(data): if bekey in ['files', 'dest', 'y_true']: continue for akey, arch_data in listitems(bedata): for ppkey, ppdata in listitems(arch_data): in_results += [IN_Result( backend=bekey, arch=akey, pp=ppkey, y_pred=arr(ppdata['y_pred']), acts=arr(ppdata['acts']) )] cmat = self.iconfuse( in_results, lamb, identiy=identiy ) labels = listmap( lambda r: str(r), in_results ) top = [None] + labels cmat = cmat.tolist() for i, li in enum(cmat): cmat[i] = [labels[i]] + cmat[i] cmat = [top] + cmat return cmat
def gen_main(FLAGS, _IMAGES_FOLDER, HUMAN_IMAGE_FOLDER): log('in gen!') _IMAGES_FOLDER.clearIfExists() HUMAN_IMAGE_FOLDER.clearIfExists() gen_cfg = FLAGS.cfg_cfg['gen_cfg'] # these numbers might be lower now that I'm excluding images that aren't squares # nevermind. I think trying to only take squares didn't work cats = ['Egyptian cat', # >=200 'Siamese cat', # 196 'Persian cat', # >=200 'tiger cat', # 182 'tabby cat'] # >=100 dogs = [ 'Afghan hound', # >=200 'basset hound', # >=200 'beagle', # 198 'bloodhound', # 199 'bluetick' # >=100 ] classes = cats + dogs not_trained = ['tabby cat', 'bluetick'] for d in dogs: nnstate.reduced_map[d] = 'dog' for c in cats: nnstate.reduced_map[c] = 'cat' if FLAGS.salience: log('in gen salience!') root = DATA_FOLDER.resolve('ImageNet/output_tf') filenames = root.glob('train*').map(lambda x: x.abspath).tolist() # validation import tensorflow as tf ds = tf.data.TFRecordDataset(filenames) # for subroot in root: # for imgfile in subroot: image_feature_description = { 'image/height' : tf.io.FixedLenFeature([], tf.int64), 'image/width' : tf.io.FixedLenFeature([], tf.int64), 'image/colorspace' : tf.io.FixedLenFeature([], tf.string), 'image/channels' : tf.io.FixedLenFeature([], tf.int64), 'image/class/label' : tf.io.FixedLenFeature([], tf.int64), 'image/class/synset': tf.io.FixedLenFeature([], tf.string), 'image/class/text' : tf.io.FixedLenFeature([], tf.string), # 'image/object/bbox/xmin' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/xmax' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/ymin' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/ymax' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/label': tf.io.FixedLenFeature([], tf.int64), 'image/format' : tf.io.FixedLenFeature([], tf.string), 'image/filename' : tf.io.FixedLenFeature([], tf.string), 'image/encoded' : tf.io.FixedLenFeature([], tf.string), } # imap = {} # current_i = -1 # def input_gen(): log('looping imagenet') _IMAGES_FOLDER[f'Training/{FLAGS.REGEN_NTRAIN}'].mkdirs() _IMAGES_FOLDER['Testing'].mkdirs() # classes = [ # 'barn spider', # 'garden spider', # 'black widow', # 'wolf spider', # 'black and gold garden spider', # # 'emmet' ,#ant # 'grasshopper', # 'cricket', # 'stick insect', # 'cockroach' # ] class_count = {cn: 0 for cn in classes} for i, raw_record in enum(ds): example = tf.io.parse_single_example(raw_record, image_feature_description) # r[f'tf']['y_true'][i] = example['image/class/label'].numpy() # return tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # if example['image/height'] != example['image/width']: # continue if i % 100 == 0: log(f'on image {i}') classname = utf_decode(example['image/class/text'].numpy()) for cn in classes: if (cn in classname) and ( class_count[cn] < (FLAGS.REGEN_NTRAIN if cn in not_trained else (FLAGS.REGEN_NTRAIN * 2))): log(f'saving {cn} {class_count[cn] + 1}') rrr = tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() if class_count[cn] < FLAGS.REGEN_NTRAIN: _IMAGES_FOLDER['Testing'][cn][f'{i}.png'].save(rrr) else: _IMAGES_FOLDER[f'Training/{FLAGS.REGEN_NTRAIN}']['dog' if cn in dogs else 'cat'][ f'{i}.png'].save(rrr) class_count[cn] += 1 break break_all = True for cn, cc in listitems(class_count): if (cn in not_trained and cc != FLAGS.REGEN_NTRAIN) or ( cn not in not_trained and cc != (FLAGS.REGEN_NTRAIN * 2)): break_all = False if break_all: break # current_i = current_i + 1 # imap[i] = rrr # yield rrr # igen = input_gen() # def get_input(index): # # log(f'trying to get index {index}') # # log(f'current indices range from {safemin(list(imap.keys()))} to {safemax(list(imap.keys()))}') # if index not in imap: # # log('coud not get it') # next(igen) # return get_input(index) # else: # # log('got it!') # rr = imap[index] # for k in list(imap.keys()): # if k < index: # del imap[k] # return rr # # for raw_record in ds: # # example = tf.io.parse_single_example(raw_record, image_feature_description) # # r[f'tf']['y_true'][index] = example['image/class/label'].numpy() # # return tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # # yield example # # y_true = [] # # ifs_for_labels = input_files() # # for i in range(SANITY_SET.num): # # y_true.append(next(ifs_for_labels)['image/class/label'].numpy()) # # r[f'tf']['y_true'] = y_true # # def input_file_raws(): # # gen = input_files() # # for example in gen: # # yield tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # # IN_files = input_file_raws() # IN_files = get_input # test_class_pairs = [ # pair for pair in chain(*[ # ( # SymAsymClassPair(n, False), # SymAsymClassPair(n, True) # ) for n in ints(np.linspace(0, 10, 6)) # ]) # ] # class_pairs = [ # SymAsymClassPair(0, False), # SymAsymClassPair(4, False) # ] # human_class_pairs = [ # SymAsymClassPair(0, False), # SymAsymClassPair(2, False), # SymAsymClassPair(4, False), # SymAsymClassPair(6, False), # SymAsymClassPair(8, False) # ] # gen_cfg = FLAGS.cfg_cfg['gen_cfg'] # gen_images( # folder=HUMAN_IMAGE_FOLDER['TimePilot'], # class_pairs=human_class_pairs, # ims_per_class=10 # ) else: test_class_pairs = [ pair for pair in chain(*[ ( SymAsymClassPair(n, False), SymAsymClassPair(n, True) ) for n in ints(np.linspace(0, 10, 6)) ]) ] class_pairs = [ SymAsymClassPair(0, False), SymAsymClassPair(4, False) ] human_class_pairs = [ SymAsymClassPair(0, False), SymAsymClassPair(2, False), SymAsymClassPair(4, False), SymAsymClassPair(6, False), SymAsymClassPair(8, False) ] gen_images( folder=HUMAN_IMAGE_FOLDER['TimePilot'], class_pairs=human_class_pairs, ims_per_class=10 ) gen_images( folder=_IMAGES_FOLDER['RSA'], class_pairs=test_class_pairs, ims_per_class=10, # ims_per_class=1 ) gen_images( folder=_IMAGES_FOLDER['Testing'], class_pairs=test_class_pairs, ims_per_class=10, # ims_per_class=500, # ims_per_class=1 ) # for n in (25, 50, 100, 150, 200, 1000): for n in (10,): gen_images( folder=_IMAGES_FOLDER['Training'][n], class_pairs=class_pairs, ims_per_class=n ) log('doing thing with _temp_ims') with mlib.file.TempFolder('_temp_ims') as temp: log('temp_ims_1') if temp.exists and temp.isdir: temp.clear() log('temp_ims_2') temp.mkdirs() log('temp_ims_3') [_IMAGES_FOLDER.copy_to(temp[f'gpu{i + 1}']) for i in range(gen_cfg['num_gpus'])] log('temp_ims_4') _IMAGES_FOLDER.clear() log('temp_ims_5') [temp[f'gpu{i + 1}'].moveinto(_IMAGES_FOLDER) for i in range(gen_cfg['num_gpus'])] log('temp_ims_6') log('finished thing with _temp_ims') nn_init_fun.NRC_IS_FINISHED() # must be invoked this way since value of function changes
def report(self, signal, t, exps): if not self.remote: if False: # DEBUG if signal is not None: signal.emit('no local report yet') else: log('no local report yet') return the_report = '\n\n\t\t\t~~JOB REPORT~~' if self.first_report[''] and signal is not None: signal.emit(the_report) for e in exps: the_report += f'\n{e.status()}' if self.first_report[''] and signal is not None: signal.emit(the_report) the_report += '\n\n' if self.first_report[''] and signal is not None: signal.emit(the_report) print(the_report) while True: # clear buffer line = self.statusP.readline_nonblocking(1) if line is None: break log('GETTING GPU REPORT') gpu_report = '\n\t\t\t~~GPU REPORT~~' self.statusP.sendline(f'nvidia-smi; echo ${self.DONE_VAR}') tesla_line = False percents = [] while True: line = self.statusP.readline_nonblocking(1) if line is None or self.DONE_STR in line: break else: if tesla_line: percents += [int(line.split('%')[0][-2:])] tesla_line = 'Tesla P4' in line for idx, perc in enum(percents): gpu_report += f'\n{idx}\t{insertZeros(perc, 2)}% {Progress.prog_bar(perc, BAR_LENGTH=self.REP_BAR_LENGTH)}' the_report += gpu_report if self.first_report[''] and signal is not None: signal.emit(the_report) log('GETTING MEM REPORT') mem_report = '\n\n\t\t\t~~MEM REPORT~~' self.statusP.sendline(f'free -h; echo ${self.DONE_VAR}') log('send mem report request') while True: line = self.statusP.readline_nonblocking(1) if line is None or self.DONE_STR in line: break else: if self.MATT_STR not in line: mem_report += f'\n{line}' the_report += mem_report if self.first_report[''] and signal is not None: signal.emit(the_report) log('\nGETTING CPU REPORT') cpu_report = '\n\n\t\t\t~~CPU REPORT~~' self.statusP.sendline( f'''echo "CPU `LC_ALL=C top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\\1/" | awk '{{print 100 - $1}}'`% RAM `free -m | awk '/Mem:/ {{ printf("%3.1f%%", $3/$2*100) }}'` HDD `df -h / | awk '/\// {{print $(NF-1)}}'`"; echo ${self.DONE_VAR}''' ) log('SENT CPU LINE') the_report += cpu_report cpu_report = '' while True: line = self.statusP.readline_nonblocking(1) if line is None or self.DONE_STR in line: break else: if self.MATT_STR not in line: cpu_report += f'\n{line}' cpu_stuff = tuple( listmap(lambda s: s.replace('%', ''), cpu_report.strip().split(' '))) if len(cpu_stuff) == 6: cpu_perc = float(cpu_stuff[1]) ram_perc = float(cpu_stuff[3]) hdd_perc = float(cpu_stuff[5]) cpu_report = f'\nCPU\t{insertZeros(cpu_perc, 4)}% {Progress.prog_bar(cpu_perc, BAR_LENGTH=self.REP_BAR_LENGTH)}' cpu_report += f'\nRAM\t{insertZeros(ram_perc, 4)}% {Progress.prog_bar(ram_perc, BAR_LENGTH=self.REP_BAR_LENGTH)}' cpu_report += f'\nHDD\t{insertZeros(hdd_perc, 4)}% {Progress.prog_bar(hdd_perc, BAR_LENGTH=self.REP_BAR_LENGTH)}' the_report += cpu_report else: the_report += f'problem getting cpu_report ({len(cpu_stuff)=})' log('sending signal with REPORT') if signal is not None: signal.emit(the_report) log('sent signal with REPORT') self.next_report[''] = t + 1 self.first_report[''] = False
def table(cls, fd, force_wolf=False, debug=False): from mlib.wolf.wolf_lang import wlexpr from matplotlib import pyplot as plt # 1 FULL SECOND IMPORT wolf = 'Wolf' in cls.__name__ or force_wolf if wolf: from mlib.wolf.wolf_figs import addHeaderLabels, LinePlotGrid, OneWayOfShowingARaster if cls == MPLFigsBackend: if BLACK_FIGS: cls.fig = plt.figure(figsize=(16, 12), facecolor='black') else: cls.fig = plt.figure(figsize=(16, 12)) if BLACK_FIGS: cls.ax = cls.fig.add_subplot(111, facecolor='black') else: cls.ax = cls.fig.add_subplot(111) cls.ax.axis("off") cls.tabl = None data = fd.data backgrounds = None if fd.confuse: low = fd.confuse_min high = fd.confuse_max # scaleBits = [] # for i in range(1, 21): # scaleBits += [[[0, 0, i / 21]]] scaleBits = JET().tolist() show_nums = fd.show_nums from copy import deepcopy backgrounds = deepcopy(data) for rrr in itr(data): for c in itr(data[rrr]): if BLACK_FIGS: backgrounds[rrr][c] = cls.color(0, 0, 0) else: backgrounds[rrr][c] = cls.color(1, 1, 1) # 256? if isnan(data[rrr][c]): if wolf: data[rrr][c] = '' backgrounds[rrr][c] = cls.none() else: data[rrr][c] = [0, 0, 0] # if BLACK_FIGS: backgrounds[rrr][c] = cls.color(0, 0, 0) # else: # backgrounds[rrr][c] = cls.color(255, 255, 255) # 256? elif not isstr(data[rrr][c]): dat = data[rrr][c] if isnan(dat): # wait! this dealt with above b = None elif high != 0: if fd.y_log_scale: b = ((dat**2) / (high**2)) else: # try: b = (dat / high) if cls.debug: # and data == 0: breakpoint() # except: else: if fd.y_log_scale: # b = np.log10(b) b = dat**2 else: b = dat if show_nums: if b is None: data[rrr][c] = 'NaN' else: data[rrr][c] = sigfig(dat, 2) else: # data[rrr][c] = [0, 0, b] # try: if b is None: # NaN breakpoint() data[rrr][c] else: # data[rrr][c] = JET[round(b * 256) - 1].tolist() # causes zeros to show wrong! data[rrr][c] = JET()[round(b * 255)].tolist() # except: if (fd.headers_included and rrr > 0 and c > 0) or not fd.headers_included: backgrounds[rrr][c] = cls.color(0, 0, b) block_len = fd.block_len if block_len is not None and fd.headers_included is False: divs = [[], []] for i in range(len(data)): if i == 1 or i % block_len == 0: divs[0] += [True] divs[1] += [True] else: divs[0] += [False] divs[1] += [False] if not fd.confuse or fd.headers_included: if cls == MPLFigsBackend and not fd.headers_included: # breakpoint() tbl = cls.ax.table( cellText=data, # rowLabels=rows, # rowColours=colors, # colLabels=columns, fontsize=fd.fontsize, loc='center') tbl.auto_set_font_size(False) tbl.set_fontsize(fd.fontsize) tbl.scale(1, (fd.fontsize + 10) / 10) # tbl.auto_set_column_width([0]) tbl.auto_set_column_width(list(range(len(data[0])))) else: breakpoint() for ri, row in enumerate(data): for ci, el in enumerate(row): if cls == MPLFigsBackend: if cls.tabl is None: cls.tabl = cls.ax.table([[1]], loc='center') width = 1 / len(data[0]) height = 1 / len(data) if ri == 0: height = 0.05 if ci == 0: width = 0.1 cell = cls.tabl.add_cell( ri, ci, width, height, loc="center", text=str(el) if el != 'Non' else "", # text="TEST_TEXT", # facecolor='green' **({ 'facecolor': backgrounds[ri][ci] } if backgrounds is not None else {})) cell.get_text().set_fontsize(20) if len(data) > 5: cell.get_text().set_fontsize(10) if BLACK_FIGS: cell.get_text().set_color('white') else: data[ri][ci] = cls.tableItem( el, backgrounds[ri][ci]) if fd.top_header_label is not None or fd.side_header_label is not None: if wolf: data = addHeaderLabels(data, fd.top_header_label, fd.side_header_label).tolist() else: cls.tabl.auto_set_font_size(False) h = cls.tabl.get_celld()[(0, 0)].get_height() w = cls.tabl.get_celld()[(0, 0)].get_width() # Create an additional Header # weird = "Header Header Header Header" weird = fd.top_header_label * 4 weird = fd.top_header_label header = [ cls.tabl.add_cell( -1, pos, w, h, loc="center", # fontsize=40.0 #facecolor="red", ) for pos in range(1, len(data[0]) + 1) ] if len(header) > 2: for idx, head in enum(header): if idx == 0: # head.visible_edges = "TBL" head.visible_edges = "" elif idx == len(header) - 1: # head.visible_edges = "TBR" head.visible_edges = "" else: # head.visible_edges = 'TB' head.visible_edges = "" header[1].get_text().set_text(weird) header[1].set_fontsize(40.0) elif len(header) == 2: header[0].visible_edges = 'TBL' header[1].visible_edges = 'TBR' header[1].get_text().set_text(weird) else: header[0].visible_edges = 'TBLR' header[0].get_text().set_text(weird) # Create an additional Header weird = fd.side_header_label * 4 weird = fd.side_header_label header = [ cls.tabl.add_cell(pos, -1, w, h, loc="center", facecolor="none") for pos in range(0, len(data) + 1) ] if len(header) > 2: for idx, head in enum(header): if idx == 0: head.visible_edges = "LTR" head.visible_edges = "" elif idx == len(header) - 1: head.visible_edges = "LRB" head.visible_edges = "" else: head.visible_edges = 'LR' head.visible_edges = "" header[1].get_text().set_text(weird) header[1].set_fontsize(40.0) # header[1].set_rotation(90.0) elif len(header) == 2: header[0].visible_edges = 'TLR' header[1].visible_edges = 'BLR' header[1].get_text().set_text(weird) header[1].set_fontsize(40.0) # header[1].set_rotation(90.0) else: header[0].visible_edges = 'TBLR' header[0].get_text().set_text(weird) header[0].set_fontsize(40.0) # header[0].set_rotation(90.0) if cls != MPLFigsBackend: insets = [ Inset( Rasterize(Grid( data, Dividers(False), ), RasterSize(), ImageSize(), Background())) ] if fd.confuse and fd.block_len is not None and fd.block_len > 1: if fd.confuse_is_identical: for rrr in itr(data): for c in itr(data[0]): if c > rrr: if BLACK_FIGS: data[rrr][c] = [0, 0, 0] else: data[rrr][c] = [1, 1, 1] # 256? if cls != MPLFigsBackend: scale = Graphics( [ Raster(scaleBits), Inset(Text(round(low), fontSize=30), [0.5, -1]), Inset(Text(round(high), fontSize=30), [0.5, 21]), ], ImagePadding([[75, 75], [20, 20]]), ) else: # create an axes on the right side of ax. The width of cax will be 5% # of ax and the padding between cax and ax will be fixed at 0.05 inch. from mpl_toolkits.axes_grid1 import make_axes_locatable # 1 FULL SECOND IMPORT divider = make_axes_locatable(cls.ax) cax = divider.append_axes("right", size="5%", pad=0.05) # cmap = matplotlib.colors.Colormap('name', N=256) # cmap = LinearSegmentedColormap.from_list( # 'bluemap', li(scaleBits).reshape(20, 3), N=20) from matplotlib.colors import LinearSegmentedColormap # 1 FULL SECOND IMPORT cmap = LinearSegmentedColormap.from_list( 'jet', li(scaleBits) # .reshape(20, 3) , N=len(scaleBits) # 20 ) import matplotlib sm = matplotlib.cm.ScalarMappable(norm=None, cmap=cmap) cbar = cls.fig.colorbar( sm, cax=cax, orientation='vertical', ticks=np.linspace( # low, 0, # high, 1, num=4)) base_scale_ticks = np.linspace(low, high, num=4) if fd.y_log_scale: base_scale_ticks = [bst**2 for bst in base_scale_ticks] cbar.ax.set_yticklabels( [sigfig(n, 3) for n in base_scale_ticks]) gl = len(data) nt = len(fd.row_headers) line_values = np.linspace(fd.block_len, fd.block_len * nt, nt) half = fd.block_len / 2 labellocs_labels = ziplist(line_values - half, fd.row_headers) # ticks start from the bottom but I want these to start from the top # labellocs_labels.reverse() if wolf: gridlines = LinePlotGrid(line_values, triangle=fd.confuse_is_identical) else: # gl = line_values[-1] listpoints = [] for i in line_values: i = i - 0.5 if fd.confuse_is_identical: listpoints += [[[i, gl - 0.5], [i, i]]] listpoints += [[[i, i], [-0.5, i]]] else: listpoints += [[[i, -0.5], [i, gl - 0.5]]] listpoints += [[[-0.5, i], [gl - 0.5, i]]] listpoints = arr(listpoints) for sub in listpoints: # cls.ax.line(sub[:, 0], sub[:, 1], 'y--') # cls.ax.plot(sub[:, 0], sub[:, 1], 'y--') cls.ax.plot(sub[:, 0], sub[:, 1], 'k--') # rasters start from the bottom but I want this to start from the top if wolf: rast = OneWayOfShowingARaster(Raster(reversed(data)), gl) else: cls.ax.imshow(list(data)) x_ticks = [] xt_mpl_t = [] xt_mpl_l = [] y_ticks = [] yt_mpl_t = [] yt_mpl_l = [] for t in labellocs_labels: if wolf: x_ticks += [ Text( t[1], coords=[t[0] / gl, -.02], direction=[1, 0], fontSize=DEFAULT_TICK_SIZE, offset=[0, 0], ) ] xt_mpl_t += [t[0] / gl] xt_mpl_l += [t[1]] for t in labellocs_labels: if wolf: y_ticks += [ Text( t[1], # y ticks need to be reversed coords=[-.01, 1 - (t[0] / gl)], direction=[1, 0], fontSize=DEFAULT_TICK_SIZE, offset=[1, 0], ) ] # y ticks not reversed for mpl? yt_mpl_t += [(t[0] / gl)] yt_mpl_l += [t[1]] if wolf: insets = [ Inset(obj=Rasterize(scale), pos=[1.2, 0], opos=[Center, Bottom]), Inset(obj=Rasterize( rast, ImageResolution(), ), opos=[Left, Bottom]), Inset( obj=Rasterize(gridlines, ImageResolution(), Background(wlexpr('None'))), opos=[Left, Bottom], background=Background( # wl.Red wlexpr('None'))) ] [insets.extend(ticks) for ticks in zip(x_ticks, y_ticks)] insets += [ Inset(Rasterize( Text(fd.title, fontSize=(40 if fd.headers_included else 20) if fd.title_size is None else fd.title_size), Background(wlexpr('None'))), scale=(1, 1), pos=Scaled([0.5, 2]), background=Background(wlexpr('None'))) ] rrr = Graphics(insets) else: title_obj = cls.ax.set_title(fd.title, fontsize=fd.title_size / 3) from matplotlib import pyplot as plt # 1 FULL SECOND IMPORT plt.setp(title_obj, color=text_color) cls.ax.axis(True) cls.ax.spines['left'].set_color(text_color) cls.ax.spines['bottom'].set_color(text_color) cls.ax.xaxis.label.set_color(text_color) cls.ax.yaxis.label.set_color(text_color) cls.ax.tick_params(axis='x', colors=text_color) cls.ax.tick_params(axis='y', colors=text_color) cls.ax.set_xticks(arr(xt_mpl_t) * gl) cls.ax.set_xticklabels(xt_mpl_l, rotation=90) # cls.ax.xticks(rotation=90) cls.ax.set_yticks(arr(yt_mpl_t) * gl) cls.ax.set_yticklabels(yt_mpl_l) cax.axis(True) cax.spines['left'].set_color(text_color) cax.spines['bottom'].set_color(text_color) cax.spines['top'].set_color(text_color) cax.spines['right'].set_color(text_color) cax.xaxis.label.set_color(text_color) cax.yaxis.label.set_color(text_color) cax.tick_params(axis='x', colors=text_color) cax.tick_params(axis='y', colors=text_color) # cax.set_xticks(li(xt_mpl_t) * gl) # cax.set_xticklabels(xt_mpl_l, rotation=90) # cls.ax.xticks(rotation=90) # cax.set_yticks(li(yt_mpl_t) * gl) # cax.set_yticklabels(yt_mpl_l) if not wolf and (fd.confuse and fd.block_len is not None and fd.block_len > 1) == False: title_obj = cls.ax.set_title(fd.title, fontsize=fd.title_size / 3) from matplotlib import pyplot as plt # 1 FULL SECOND IMPORT plt.setp(title_obj, color=text_color) if wolf: return rrr
def analyze_exp_group(eg: DNN_ExperimentGroup, cfg): eg.compile_folder.deleteIfExists() eg.metadata.copy_into(eg.compile_folder) ARCH_LABELS = listmap(__['label'], eg.metadata['archs']) NTRAINS = eg.metadata['ntrainims'] NEPOCHS = eg.metadata['nepochs'] [ a.during_compile(eg) for a in ANALYSES(mode=AnalysisMode.PIPELINE) if a.should_run(cfg) ] experiments = experiments_from_folder(eg.folder) random_exp = experiments[0] TrainTable = FinalResult(2, 'test/Matthews_Correlation_Coefficient.mfig', data_exists=random_exp.folder[f'test'].exists, is_table=True, rows=ARCH_LABELS, cols=NTRAINS) random_exp.folder['log.pkl'].copy_into(eg.compile_folder) def maybe_avg_result(pre, nepochs, is_table=False, dims=2, suf=None): return AverageResult( dims, f'{pre}/CM{nepochs}.mfig' if suf is None else f'{pre}/{suf}', data_exists=random_exp.folder[pre].exists, is_table=is_table) results_to_compile = [] for ni, ntrain in enum(NTRAINS): MCC = maybe_avg_result('test', None, dims=1, suf='Matthews_Correlation_Coefficient.mfig') for ai, arch in enum(ARCH_LABELS): if cfg.salience: results_to_compile = [TrainTable] else: results_to_compile = [TrainTable, MCC] if random_exp.folder['L2-Output'].exists: maybe_avg_result(f'L2-Output', NEPOCHS) maybe_avg_result(f'L2-Inter', NEPOCHS) maybe_avg_result(f'L2-Raw', NEPOCHS) if not cfg.salience: results_to_compile.append( maybe_avg_result(f'val', NEPOCHS, is_table=True)) results_to_compile = [ r for r in results_to_compile if r is not None ] for exp in experiments.filtered( lambda e: e.arch == arch and e.ntrain == ntrain, ): for res in results_to_compile: try: res.append(res.exp_data(exp), (ai, ni, 0), is_GNET=arch == 'GNET') except: breakpoint() for res in results_to_compile: if not res.data_exists: continue if res.j is None: log('about to breakpoint') breakpoint() else: log('res.j is not none, so no breakpoint') for vis in res.j.viss: vis.make = True if res.dims == 1: LINE_INDEX = -1 avg = np.mean(res.data, axis=0).tolist() res.data = arr2d() res.j.viss.append(copy.deepcopy(res.template)) res.j.viss[LINE_INDEX].make = True res.j.viss[LINE_INDEX].y = avg res.j.viss[LINE_INDEX].item_colors = CONTRAST_COLORS[ai] for v in itr(res.j.viss): res.j.viss[v].title = f'MCC(nTrainIms={ntrain})' res.j.viss[LINE_INDEX].y_label = arch elif res.dims == 2: if isinstance(res, AverageResult): avg = np.mean(res.data, axis=2) else: avg = res.data[:, :, 0] res.j.viss[0].confuse_max = flatmax(avg) # res.j.viss[0].title_size = 30 if res.is_table: avg = np.concatenate((res.row_headers[1:], avg), axis=1) avg = np.concatenate((res.col_headers, avg), axis=0) if isinstance(res, AverageResult): res.j.viss[0].data = avg.tolist() res.j.viss[0].title = res.j.viss[0].title.replace( " ", f'{ntrain} ', 1) eg.compile_exp_res_folder[ f'{arch}_{ntrain}__{res.suffix.replace("/", "_")}'].save( res.j) elif ai == len(ARCH_LABELS) - 1 and ni == len(NTRAINS) - 1: res.j.viss = [ ConfusionMatrix(data=avg.tolist(), title="Final Training MCCs", confuse_min=0, confuse_max=1, headers_included=True, make=True, side_header_label='Architecture', top_header_label='#Train Images') ] eg.compile_exp_res_folder[ f'Final_Train_MCC.mfig'] = res.j for res in [ mcc for mcc in results_to_compile if mcc.dims == 1 and mcc.data_exists ]: eg.compile_exp_res_folder[StringExtension(res.suffix[1:]).r({ "test/": "", ".": f"_{ntrain}." })].save(res.j)
def unique_pairs(aaa, pair_to_self=False): for i, a in enum(aaa): for ii, aa in enum(aaa): if ii > i or (i == ii and pair_to_self): yield a, aa
def _log_plot(log_data, savetofile, checkpoint_lines: List[str], pipeline_sections): import matplotlib.pyplot as plt # 1 SECOND IMPORT important_text = [] important_time = [] for lin, file_line, t in log_data: for cl in checkpoint_lines: if cl in file_line: important_text.append(cl) important_time.append(t) @dataclass class LoggedPipelineSection: start: float end: float thread: str process: str pid: str label: str # subsections: Optional[List] = None # does do anything yet index: int = -1 # MUST SET LATER source: Optional[int] = None # might set later sourceSec: Optional = None x: int = 0 # set later time_amount: Optional[float] = None time_rel: Optional[float] = None time_amount_rel: Optional[float] = None y_center: Optional[float] = None color: str = 'orange' loggedSections = [] for sec, v in listitems(pipeline_sections): if v['start'] and v['end']: loggedSections.append(LoggedPipelineSection( start=v['start'], end=v['end'], label=sec, thread=v['thread'], pid=v['pid'], process=v['process'] )) total = log_data[-1][2] important_text = [shorten_str(s, 20) for s in important_text] fig, axs = plt.subplots(nrows=1) table_ax = axs table_ax.set_axis_off() important_time = [round(t, 2) for t in important_time] if important_time: table = table_ax.table( cellText=[[str(t)] for t in important_time], rowLabels=important_text, colLabels=['time'], rowColours=["palegreen"] * (len(important_text) + 1), colColours=["palegreen"] * 2, colWidths=[0.5, 0.5], cellLoc='center', loc='center' ) table_ax.set_title('Important Logs', fontweight="bold") time_amounts = [] time_rels = [] time_amount_rels = [] y_centers = [] last = 0 for t in important_time: time_amounts.append(t - last) time_rels.append(t / total) time_amount_rels.append(time_amounts[-1] / total) y_centers.append(time_rels[-1] - (time_amount_rels[-1] / 2)) last = t sizes = important_time loggedSectionsTotal = loggedSections[0].end - loggedSections[0].start for i, sec in enum(loggedSections): sec.time_amount = sec.end - sec.start # no need for time_rel? sec.time_amount_rel = sec.time_amount / loggedSectionsTotal sec.y_center = (((sec.end - (sec.time_amount / 2)) - loggedSections[0].start) / loggedSectionsTotal) sec.index = i loggedSections[0].y_center = 0.5 for sec in loggedSections: candidates = [] for secsec in loggedSections: if sec.start > secsec.start: candidates.append(secsec) candidates2 = [] for cand in candidates: if sec.end < cand.end: candidates2.append(cand) elif sec.start > secsec.end: pass # OVERLAP! # assert sec.start > secsec.end # throws error if there is overlap but not nesting if candidates2: secsec = max(candidates2, key=lambda x: x.start) sec.source = secsec.index sec.sourceSec = secsec def count_recurse(sec): if sec.sourceSec: return 1 + count_recurse(sec.sourceSec) else: return 0 for sec in loggedSections: sec.x = count_recurse(sec) colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] while len(colors) < len(sizes): colors = colors + colors colors = colors[:len(sizes)] if important_text: plt.savefig(savetofile.abspath) plt.clf() maxX = max([sec.x for sec in loggedSections]) xstep = normX = 1 / maxX for sec in loggedSections: sec.x = sec.x / maxX labels = [sec.label for sec in loggedSections] values = [sec.time_amount for sec in loggedSections if sec.source is not None] if True: for i in itr(labels): if i > 0: labels[i] = labels[i] + f' ({format_sec_dur(values[i - 1])})' labels[0] = labels[0] + f' ({format_sec_dur(loggedSections[0].time_amount)})' jitter_step = xstep / 10 keepJittering = True while keepJittering: for sec, secsec in unique_pairs(loggedSections): if sec.x == secsec.x: if sec.thread != secsec.thread or sec.process != secsec.process or sec.pid != secsec.pid: secsec.color = 'blue' secsec.x += jitter_step break keepJittering = False import plotly.graph_objects as go fig = go.Figure(data=[go.Sankey( # arrangement="fixed", # no cutoff, but overlap arrangement="snap", # no overlap, but cutoff # arrangement = "perpendicular", # overlap and cutoff (less of both) # arrangement="freeform",# both overlap and cutoff node=dict( pad=15, thickness=20, line=dict(color="black", width=0.5), label=labels, y=[sec.y_center for sec in loggedSections], x=(arr([sec.x for sec in loggedSections]) * 1.0).tolist(), color=[sec.color for sec in loggedSections] ), link=dict( source=[sec.source for sec in loggedSections if sec.source is not None], target=list(range(1, (len(loggedSections)))), value=values ))]) fig.update_layout( font_size=20, ) html = _get_fig(fig, full_html=True, include_plotlyjs=True) File(savetofile).res_pre_ext("_sankey").resrepext('html').write(html)
from lib.datamodel.Classification import Class, ClassSet from mlib.boot.lang import enum RSA_CLASSES = ClassSet([ Class(name=n, index=i) for i, n in enum( ['NS0', 'NS2', 'NS4', 'NS6', 'NSd4', 'S0', 'S2', 'S4', 'S6', 'Sd4']) ]) RSA_LAYERS = { "SQN": 'relu_conv10', # 784 "AlexNet": 'fc7', # 4096 "GoogleNet": 'inception_5b-output', # 50176 "IRN": 'conv_7b_ac', # 98304 "IV3": 'mixed10', # 131072 "RN18": 'res5b-relu', # 25088, "LSTM": 'final cell' }