def genome_s3_autoload(s3_url): s3 = boto3.client('s3') bucket = re.search('s3://(.+?)/', s3_url).group(1) object_name = re.search(f's3://{bucket}/(.+)', s3_url).group(1) with tempfile.NamedTemporaryFile(mode='w+b') as temp: print('temporary file', temp.name) s3.download_fileobj(bucket, object_name, temp) if kn.unpack(s3_url)['ext'] == '.json': with open(temp.name, 'r') as f: return json.load(f) elif kn.unpack(s3_url)['ext'] == '.json.gz': try: with gzip.open(temp.name, 'rb') as f: return json.loads(f.read().decode('ascii')) except Exception: pass try: with gzip.open(temp.name, 'rb') as f: return json.loads(f.read().decode('utf-8')) except Exception: pass raise ValueError
def RenderAndSave(upd, filename): file = h5py.File(filename, 'r') nlev = int(file.attrs.get('NLEV')) death = np.array(file['Death']['upd_' + str(upd)]) live = np.array(file['Live']['upd_' + str(upd)]) image = np.array([ [ # dead (0.0, 0.0, 0.0) if not val_live else { 0: (1.0, 1.0, 1.0), # alive 1: (0.0, 1.0, 0.0), # apoptosis 2: (1.0, 0.0, 0.0), # bankrupt 3: (0.0, 0.0, 1.0), # replaced }[val_death] for val_death, val_live in zip(row_death, row_live) ] for row_death, row_live in zip(death, live) ]) plt.figure(figsize=(18, 18)) plt.imshow(image, extent=(0, image.shape[1], image.shape[0], 0)) plt.axis('off') plt.grid(b=None) lines = LineCollection([((x, y), dest) for x in range(image.shape[0]) for y in range(image.shape[1]) for dest in ((x + 1, y), (x, y + 1))], linestyle='solid', colors='black') plt.gca().add_collection(lines) plt.savefig(kn.pack({ 'title': 'death_viz', 'update': str(upd), 'seed': kn.unpack(filename)['seed'], 'treat': kn.unpack(filename)['treat'], '_data_hathash_hash': fsh.FilesHash().hash_files([filename]), '_script_fullcat_hash': fsh.FilesHash(file_parcel="full_parcel", files_join="cat_join").hash_files([sys.argv[0]]), '_source_hash': kn.unpack(filename)['_source_hash'], 'ext': '.png' }), transparent=True, bbox_inches='tight', pad_inches=0) plt.clf() plt.close(plt.gcf())
def RenderAndSave(upd, filename): file = h5py.File(filename, 'r') nlev = int(file.attrs.get('NLEV')) live = np.array(file['Live']['upd_' + str(upd)]) pop = np.array(file['Population']['upd_' + str(upd)]) triggers = np.array(file['Triggers']['upd_' + str(upd)]) image = np.array([[(0.0, 0.0, 0.0) if not val_live else (1.0, 1.0, 1.0) for val_live in row_live] for row_live in live]) plt.figure(figsize=(18, 18)) plt.imshow(image, extent=(0, image.shape[1], image.shape[0], 0)) plt.axis('off') plt.grid(b=None) lines = LineCollection( [((x, y), dest) for x in range(image.shape[0]) for y in range(image.shape[1]) for dest in ((x + 1, y), (x, y + 1)) if (pop[y][x] != pop[dest[1] - 1][dest[0] - 1] or triggers[y][x] != triggers[dest[1] - 1][dest[0] - 1])], linestyle='solid', colors='red') plt.gca().add_collection(lines) plt.savefig(kn.pack({ 'title': 'death_viz', 'update': str(upd), 'seed': kn.unpack(filename)['seed'], 'treat': kn.unpack(filename)['treat'], '_data_hathash_hash': fsh.FilesHash().hash_files([filename]), '_script_fullcat_hash': fsh.FilesHash(file_parcel="full_parcel", files_join="cat_join").hash_files([sys.argv[0]]), '_source_hash': kn.unpack(filename)['_source_hash'], 'ext': '.png' }), transparent=True, bbox_inches='tight', pad_inches=0) plt.clf() plt.close(plt.gcf())
def get_critical_sites(variant_df, control_fits_df): # count competions where both strains went extinct simultaneously # as 0 Fitness Differential na_rows = variant_df['Fitness Differential'].isna() assert all(variant_df[na_rows]['Population Extinct']) variant_df['Fitness Differential'].fillna( 0, inplace=True, ) res = {} for series in variant_df['Competition Series'].unique(): series_df = variant_df[variant_df['Competition Series'] == series] wt_vs_variant_df = series_df[ series_df['genome variation'] != 'master'].reset_index() h0_fit = ip.popsingleton( control_fits_df[control_fits_df['Series'] == series].to_dict( orient='records', )) # calculate the probability of observing fitness differential result # under control data distribution if len(wt_vs_variant_df): wt_vs_variant_df['p'] = wt_vs_variant_df.apply( lambda row: stats.t.cdf( row['Fitness Differential'], h0_fit['Fit Degrees of Freedom'], loc=h0_fit['Fit Loc'], scale=h0_fit['Fit Scale'], ), axis=1, ) else: # special case for an empty dataframe # to prevent an exception wt_vs_variant_df['p'] = [] p_thresh = 1.0 / 100 less_fit_variants = wt_vs_variant_df[wt_vs_variant_df['p'] < p_thresh] variation_attrs = [ kn.unpack(kn.promote(variation), source_attr=False) for variation in less_fit_variants['genome variation'] ] assert all('Nop-' in ip.popsingleton(attrs.values()) for attrs in variation_attrs) critical_idxs = [ int(re.search('^i(\d+)$', ip.popsingleton(attrs.keys())).group(1)) for attrs in variation_attrs ] res[series] = set(critical_idxs) return res
def genome_local_autoload(target): if kn.unpack(target)['ext'] == '.json': with open(target, 'r') as f: return json.load(f) elif kn.unpack(target)['ext'] == '.json.gz': try: with gzip.open(target, 'rb') as f: return json.loads(f.read().decode('ascii')) except Exception: pass try: with gzip.open(target, 'rb') as f: return json.loads(f.read().decode('utf-8')) except Exception: pass raise ValueError
def make_output_filename(): df = pd.DataFrame.from_records( [kn.unpack(source_filename) for source_filename in sys.argv[1:]], ) out_attrs = { column: ib.dub(df[column]) for column in df.columns if column not in { '_', 'ext', } } out_filename = kn.pack({ **out_attrs, **{ 'concat': str(len(sys.argv) - 1), 'ext': '.csv', }, }) return out_filename
from keyname import keyname as kn import sys import pandas as pd filenames = sys.argv[1:] dataframes = [] for filename in filenames: df = pd.read_csv(filename) for k, v in kn.unpack(filename).items(): if k not in ['_', 'ext']: df[k] = v dataframes.append(df) res = pd.concat(dataframes, ignore_index=True) res.to_csv("consolidated.csv", index=False)
import numpy as np import sys from tqdm import tqdm import seaborn as sns import pandas as pd import matplotlib.pyplot as plt import os from keyname import keyname as kn from fileshash import fileshash as fsh from joblib import delayed, Parallel upd = int(sys.argv[1]) filenames = sys.argv[2:] # check all data is from same software source assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1 def LoadRelevantData(filename): return pd.concat([ chunk[chunk['update'] == upd] for chunk in pd.read_csv(filename, iterator=True, chunksize=2048) ]) def SafeLoadRelevantData(filename): try: res = LoadRelevantData(filename) if not len(res): raise ValueError("update " + str(upd) + " not found") return res except Exception as e: print("warning: corrupt or incomplete data file... skipping")
np.max(egv_cvals), "Eigenvector Centrality Variance": np.var(egv_cvals), "Maximum Load Centrality": np.max(load_cvals), "Median Load Centrality": np.median(load_cvals), "Load Centrality Variance": np.var(load_cvals), "Minimum Spanning Weight": nx.minimum_spanning_tree(G.to_undirected()).size(weight='weight') }) outfile = kn.pack({ 'title': kn.unpack(dataframe_filename)['title'] + "-stats", 'bitweight': kn.unpack(dataframe_filename)['bitweight'], 'seed': kn.unpack(dataframe_filename)['seed'], '_data_hathash_hash': fsh.FilesHash().hash_files([dataframe_filename]), '_script_fullcat_hash': fsh.FilesHash(file_parcel="full_parcel", files_join="cat_join").hash_files([sys.argv[0]]), # '_source_hash' :kn.unpack(dataframe_filename)['_source_hash'], 'ext': '.csv' }) pd.DataFrame.from_records(out).to_csv(outfile, index=False) print("output saved to", outfile)
matplotlib.rcParams['pdf.fonttype'] = 42 df_key = pd.read_csv(sys.argv[1]) dataframe_filenames = sys.argv[2:] dfs = [(filename, pd.read_csv(filename)) for filename in dataframe_filenames] print("Data loaded!") res = [] for filename, df in dfs: df = df.groupby(['Genome Position']).mean().reset_index() for k, v in kn.unpack(filename).items(): df[k] = v res.append(df) df_data = pd.concat(res) df_data['Slug'] = df_data['metric-slug'] key = { row['Slug']: {col: row[col] for col, val in row.iteritems() if col != 'Slug'} for idx, row in df_key.iterrows() }
ax.set( ylim=(0, df[df["Cause"] == "Apoptosis"]["Per-Cell-Update Death Rate"].max() * 1.1)) plt.xticks(rotation=-90) outfile = kn.pack({ 'title': 'apoptosis', '_data_hathash_hash': fsh.FilesHash().hash_files([dataframe_filename]), '_script_fullcat_hash': fsh.FilesHash(file_parcel="full_parcel", files_join="cat_join").hash_files([sys.argv[0]]), '_source_hash': kn.unpack(dataframe_filename)['_source_hash'], 'ext': '.pdf' }) ax.get_figure().savefig(outfile, transparent=True, bbox_inches='tight', pad_inches=0) print('Output saved to', outfile) plt.clf() ax = sns.barplot( x="Treatment",
import h5py import sys import scipy.stats as stats from tqdm import tqdm import os import pandas as pd from keyname import keyname as kn from fileshash import fileshash as fsh from joblib import delayed, Parallel first_update = int(sys.argv[1]) last_update = int(sys.argv[2]) filenames = sys.argv[3:] # check all data is from same software source assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1 def CalcSurroundedRate(filename): file = h5py.File(filename, 'r') nlev = int(file.attrs.get('NLEV')) return np.mean([ sum(ro[idx] != -1 for ro in ros) for ch, pc, dirs, live, ros in [( np.array(file['Channel']['lev_' + str(nlev - 1)]['upd_' + str(upd)]).flatten(), np.array(file['PrevChan']['upd_' + str(upd)]).flatten(), [ np.array(file['Index'][dir_key]).flatten() for dir_key in file['RepOutgoing'] ], np.array(file['Live']['upd_' + str(upd)]).flatten(), [
import os from tqdm import tqdm import pandas as pd from keyname import keyname as kn from fileshash import fileshash as fsh import re from collections import Counter, defaultdict from joblib import delayed, Parallel import json num_files = int(sys.argv[1]) filenames = sys.argv[2:num_files+2] updates = [int(v) for v in sys.argv[num_files+2:]] # check all data is from same software source assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1 def GroupShape(filename): file = h5py.File(filename, 'r') indices = { idx : i for i, idx in enumerate(np.array(file['Index']['own']).flatten()) } neighs = [ np.array(file['Index']['dir_'+str(dir)]).flatten() for dir in range(4) ] nlev = int(file.attrs.get('NLEV')) res = defaultdict(lambda: [0 for __ in range(5)])
def RenderAndSave(upd, filename): file = h5py.File(filename, 'r') nlev = int(file.attrs.get('NLEV')) channel = np.array( file['Channel']['lev_'+str(nlev-1)]['upd_'+str(upd)] ).flatten() regulator = [ np.array( file['Regulators']['dir_'+str(dir)]['upd_'+str(upd)] ).flatten() for dir in range(4) ] decoder = np.array( file['Regulators']['decoder']['upd_'+str(upd)] ).flatten() live = np.array(file['Live']['upd_'+str(upd)]) index = np.array(file['Index']['own']) data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)]) data_1 = ( np.array(file['Channel']['lev_0']['upd_'+str(upd)]) if nlev == 1 else np.array(file['Channel']['lev_1']['upd_'+str(upd)]) ) # get unique group IDs ids = { id for id in channel.flatten() } # for each group, get all functions cmapper = [ {} for dir in range(4) ] for id in ids: tags_to_regs = [] idxs = [] dirs = [] for flat_idx, idx in enumerate(index.flatten()): if channel[flat_idx] == id: for dir in range(4): idxs.append( idx ) dirs.append( dir ) if live.flatten()[flat_idx]: archive = json.loads( decoder[regulator[dir][flat_idx]].decode("utf-8") )['value0'] tags = { d['key'] : d['value']['value0']['value0'] for d in archive['tags'] } regulators = { d['key'] : d['value'] for d in archive['regulators'] } tags_to_regs.append({ tags[uid] : regulators[uid]["state"] for uid in archive['uids'] }) df = pd.DataFrame.from_records(tags_to_regs).fillna(1) n=min(3, len(df.columns), len(df)) if n: pca = PCA(n_components=n) pc = None with warnings.catch_warnings(): # ignore sklearn and divide by zero warnings # (we handle them below) warnings.simplefilter("ignore") pc = pca.fit_transform(df.to_numpy()) pc = (pc - pc.min(0)) / pc.ptp(0) for idx, dir, row in zip(idxs, dirs, pc): cmapper[dir][idx] = ( row[0] if row.size >= 1 and not np.isnan(row[0]) else 0.5, row[1] if row.size >= 2 and not np.isnan(row[1]) else 0.5, row[2] if row.size >= 3 and not np.isnan(row[2]) else 0.5, ) else: for idx, dir in zip(idxs, dirs): cmapper[dir][idx] = (0.5, 0.5, 0.5) image = np.flip(np.rot90(np.transpose(np.block([ [ np.transpose(RenderTriangles( cmapper[0][val_index], cmapper[1][val_index], cmapper[2][val_index], cmapper[3][val_index], val_live )) for val_live, val_index in zip(row_live, row_index) ] for row_live, row_index in zip(live, index) ])),k=1),axis=0) plt.figure(figsize=(18,18)) plt.imshow( image, extent = (0, image.shape[1], image.shape[0], 0) ) plt.axis('off') plt.grid(b=None) rescale = lambda coord: [v * 42 for v in coord] lines_0 = LineCollection([ [ rescale(coord) for coord in ((x,y), dest) ] for x in range(index.shape[0]) for y in range(index.shape[1]) for dest in ((x+1,y), (x,y+1)) if data_0[y][x] != data_0[dest[1]-1][dest[0]-1] ], linestyle='solid', colors='white', linewidths=(2,)) plt.gca().add_collection(lines_0) lines_1 = LineCollection([ [ rescale(coord) for coord in ((x,y), dest) ] for x in range(index.shape[0]) for y in range(index.shape[1]) for dest in ((x+1,y), (x,y+1)) if data_1[y][x] != data_1[dest[1]-1][dest[0]-1] ], linestyle='solid', colors='black', linewidths=(2,)) plt.gca().add_collection(lines_1) plt.savefig( kn.pack({ 'title' : 'directional_regulator_viz', 'update' : str(upd), 'seed' : kn.unpack(filename)['seed'], 'treat' : kn.unpack(filename)['treat'], '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]), '_script_fullcat_hash' : fsh.FilesHash( file_parcel="full_parcel", files_join="cat_join" ).hash_files([sys.argv[0]]), '_source_hash' :kn.unpack(filename)['_source_hash'], 'ext' : '.png' }), transparent=True, bbox_inches='tight', pad_inches=0 ) plt.clf() plt.close(plt.gcf())
prefix_url, regex = sys.argv[1:] except: print('bad arguments') print('USAGE: [prefix_url] [regex]') sys.exit(1) bucket = re.search('s3://(.+?)/', prefix_url).group(1) prefix = re.search(f's3://{bucket}/(.+)', prefix_url).group(1) print(f'prefix_url {prefix_url}') print(f'regex {regex}') print(f'bucket {bucket}') print(f'prefix {prefix}') assert prefix.count('stage=') == 1 and prefix.count('what=') == 1 assert any('stage' in kn.unpack(segment) and 'what' in kn.unpack(segment) for segment in prefix.split('/')) stages, = [ list(kn.unpack(segment)['stage'].split('~')) for segment in prefix.split('/') if 'stage' in kn.unpack(segment) and 'what' in kn.unpack(segment) ] print(f'stages {stages}') prefixes = [ '/'.join( kn.pack({ **kn.unpack(segment), **{
prefix_url, regex = sys.argv[1:] except: print(__doc__) sys.exit(1) bucket = re.search('s3://(.+?)/', prefix_url).group(1) prefix = re.search(f's3://{bucket}/(.+)', prefix_url).group(1) print(f'prefix_url {prefix_url}') print(f'regex {regex}') print(f'bucket {bucket}') print(f'prefix {prefix}') assert prefix.count('stage=') == 1 and prefix.count('what=') == 1 assert any( 'stage' in kn.unpack(segment) and 'what' in kn.unpack(segment) for segment in prefix.split('/') ) stages, = [ list(kn.unpack(segment)['stage'].split('~')) for segment in prefix.split('/') if 'stage' in kn.unpack(segment) and 'what' in kn.unpack(segment) ] print(f'stages {stages}') prefixes = [ '/'.join( kn.pack({ **kn.unpack(segment), **{ 'stage' : stage, },
g = sns.barplot( data=df_data, x='Metric', y='Match Distance', order=sorted(['Hamming', 'Hash', 'Integer', 'Streak', 'Integer (bi)']), ) g.set(ylim=(0, 1)) g.set_xticklabels(g.get_xticklabels(), rotation=90) plt.gcf().set_size_inches(3.75, 2.75) outfile = kn.pack({ 'title': 'dimensionality_barplot', 'bitweight': kn.unpack(dataframe_filename)['bitweight'], 'seed': kn.unpack(dataframe_filename)['seed'], '_data_hathash_hash': fsh.FilesHash().hash_files([dataframe_filename]), '_script_fullcat_hash': fsh.FilesHash(file_parcel="full_parcel", files_join="cat_join").hash_files([sys.argv[0]]), # '_source_hash' :kn.unpack(dataframe_filename)['_source_hash'], 'ext': '.pdf' }) plt.savefig(outfile, transparent=True, bbox_inches='tight', pad_inches=0) print("output saved to", outfile) plt.clf()
])['Elapsed Generations', ].diff(periods=-1) df['Generations Per Update'] = df['Elapsed Generations Delta'] / df['Update'] ################################################################################ print() print('calculating upload path') print('---------------------------------------------------------------------') ################################################################################ # common_keys = set.intersection(*[ # set( kn.unpack(source).keys() ) # for source in sources # ]) out_filename = kn.pack(kn.unpack(data_url.key), ) out_prefix = f'endeavor={endeavor}/thread-profiles/stage=8+what=elaborated/' out_path = out_prefix + out_filename print(f'upload path will be s3://{bucket}/{out_path}') ################################################################################ print() print('dumping and uploading') print('---------------------------------------------------------------------') ################################################################################ # have to work with filename or pandas compression doesn't work with tempfile.TemporaryDirectory() as tmp:
def load_json(filename): with open(filename) as json_file: data = json.load(json_file) return data res = defaultdict(list) for filename, entry in [(filename, load_json(filename)) for filename in sys.argv[1:]]: for benchmark in entry['benchmarks']: res[frozendict({ 'run_type': benchmark['run_type'], })].append({ 'Library': kn.unpack(filename)['library'], 'Implementation': 'vanilla' if 'SignalGP' in filename else 'lite', 'Statistic': (benchmark['aggregate_name'] if 'aggregate_name' in benchmark else 'measurement'), 'Wall Nanoseconds': benchmark['real_time'], 'CPU Nanoseconds': benchmark['cpu_time'], 'num agents': benchmark['num agents'], }) for run_specs, rows in res.items(): pd.DataFrame(rows).to_csv( kn.pack({
def RenderAndSave(upd, filename): file = h5py.File(filename, 'r') nlev = int(file.attrs.get('NLEV')) own = np.array(file['Index']['own']).flatten() dirs = { 'top': np.array(file['Index']['dir_0']).flatten(), 'bottom': np.array(file['Index']['dir_1']).flatten(), 'left': np.array(file['Index']['dir_3']).flatten(), 'right': np.array(file['Index']['dir_2']).flatten(), } chans = [ np.array(file['Channel']['lev_' + str(lev)]['upd_' + str(upd)]).flatten() for lev in range(nlev) ] cage = np.array(file['CellAge']['upd_' + str(upd)]).flatten() pvch = np.array(file['PrevChan']['upd_' + str(upd)]).flatten() ppos = np.array(file['ParentPos']['upd_' + str(upd)]).flatten() live = np.array(file['Live']['upd_' + str(upd)]) data_0 = np.array(file['Channel']['lev_0']['upd_' + str(upd)]) data_1 = (np.array(file['Channel']['lev_0']['upd_' + str(upd)]) if nlev == 1 else np.array(file['Channel']['lev_1']['upd_' + str(upd)])) res = defaultdict(dict) for idx in range(own.size): for dir, drct in dirs.items(): type = NONE if pvch[idx] == chans[-1][drct[idx]]: type = P_CHILD elif pvch[drct[idx]] == chans[-1][idx]: type = P_PARENT else: # grayscale channel ID type = (chans[-1][idx] / 2**64) * 0.8 res[own[idx]][dir] = type own = np.array(file['Index']['own']) live = np.array(file['Live']['upd_' + str(upd)]) image = np.flip(np.rot90(np.transpose( np.block([[ np.transpose( RenderTriangles(res[val_own]['top'], res[val_own]['bottom'], res[val_own]['right'], res[val_own]['left'], val_live)) for val_own, val_live in zip(row_own, row_live) ] for row_own, row_live in zip(own, live)])), k=1), axis=0) plt.figure(figsize=(18, 18)) plt.imshow(image, extent=(0, image.shape[1], image.shape[0], 0)) plt.axis('off') plt.grid(b=None) rescale = lambda coord: [v * 42 for v in coord] lines_0 = LineCollection( [[rescale(coord) for coord in ((x, y), dest)] for x in range(data_0.shape[0]) for y in range(data_0.shape[1]) for dest in ((x + 1, y), (x, y + 1)) if data_0[y][x] != data_0[dest[1] - 1][dest[0] - 1]], linestyle=(0, (1, 3)), colors='0.5') plt.gca().add_collection(lines_0) lines_1 = LineCollection( [[rescale(coord) for coord in ((x, y), dest)] for x in range(data_1.shape[0]) for y in range(data_1.shape[1]) for dest in ((x + 1, y), (x, y + 1)) if data_1[y][x] != data_1[dest[1] - 1][dest[0] - 1]], linestyle='solid', colors='black') plt.gca().add_collection(lines_1) plt.savefig(kn.pack({ 'title': 'directional_propagule_viz', 'update': str(upd), 'seed': kn.unpack(filename)['seed'], 'treat': kn.unpack(filename)['treat'], '_data_hathash_hash': fsh.FilesHash().hash_files([filename]), '_script_fullcat_hash': fsh.FilesHash(file_parcel="full_parcel", files_join="cat_join").hash_files([sys.argv[0]]), '_source_hash': kn.unpack(filename)['_source_hash'], 'ext': '.png' }), transparent=True, bbox_inches='tight', pad_inches=0) plt.clf() plt.close(plt.gcf())
return np.mean([ 1 if apop[idx] == 2 else 0 for apop in [np.array(file['Apoptosis']['upd_' + str(upd)]).flatten()] for idx in range(file['Index']['own'].size) ]) def ExtractUpdates(file): return [int(re.findall('\d+', key)[0]) for key in file['Apoptosis'].keys()] outfile = kn.pack({ 'title': 'apoptosis_series', 'seed': kn.unpack(filename)['seed'], 'treat': kn.unpack(filename)['treat'], '_data_fullcat_hash': fsh.FilesHash(file_parcel="full_parcel", files_join="cat_join").hash_files([filename]), '_script_fullcat_hash': fsh.FilesHash(file_parcel="full_parcel", files_join="cat_join").hash_files([sys.argv[0]]), '_source_hash': kn.unpack(filename)['_source_hash'], 'ext': '.csv' }) pd.DataFrame.from_dict([{
def RenderAndSave(upd, filename): file = h5py.File(filename, 'r') nlev = int(file.attrs.get('NLEV')) # display current stockpile AND inbound resource stock = np.array(file['Stockpile']['upd_'+str(upd)]) share = np.array(file['TotalContribute']['upd_'+str(upd)]) live = np.array(file['Live']['upd_'+str(upd)]) data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)]) data_1 = ( np.array(file['Channel']['lev_0']['upd_'+str(upd)]) if nlev == 1 else np.array(file['Channel']['lev_1']['upd_'+str(upd)]) ) image = np.array([ [ # dead (0.0, 0.0, 0.0) if not val_live else # enough resource to reproduce (green to yellow) ( min(1.0, (val_stock + val_share) - 1.0), 1.0, 0.0 ) if val_stock + val_share > 1.0 else # not yet enough resource to reproduce (blue) ( 1.0 - (val_stock + val_share), 1.0 - (val_stock + val_share), 1.0 ) if val_stock + val_share > 0.0 else # netative resource (red) ( 1.0, max(0.0, 1.0 + (val_stock + val_share) / 1.25), max(0.0, 1.0 + (val_stock + val_share) / 1.25) ) for val_stock, val_share, val_live in zip(row_stock, row_share, row_live) ] for row_stock, row_share, row_live in zip(stock, share, live)]) plt.figure(figsize=(18,18)) plt.imshow( image, extent = (0, image.shape[1], image.shape[0], 0) ) plt.axis('off') plt.grid(b=None) lines_0 = LineCollection([ ((x,y), dest) for x in range(image.shape[0]) for y in range(image.shape[1]) for dest in ((x+1,y), (x,y+1)) if data_0[y][x] != data_0[dest[1]-1][dest[0]-1] ], linestyle=(0, (1, 3)), colors='0.5') plt.gca().add_collection(lines_0) lines_1 = LineCollection([ ((x,y), dest) for x in range(image.shape[0]) for y in range(image.shape[1]) for dest in ((x+1,y), (x,y+1)) if data_1[y][x] != data_1[dest[1]-1][dest[0]-1] ], linestyle='solid', colors='black') plt.gca().add_collection(lines_1) plt.savefig( kn.pack({ 'title' : 'stockpile_viz', 'update' : str(upd), 'seed' : kn.unpack(filename)['seed'], 'treat' : kn.unpack(filename)['treat'], '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]), '_script_fullcat_hash' : fsh.FilesHash( file_parcel="full_parcel", files_join="cat_join" ).hash_files([sys.argv[0]]), '_source_hash' :kn.unpack(filename)['_source_hash'], 'ext' : '.png' }), transparent=True, bbox_inches='tight', pad_inches=0 ) plt.clf() plt.close(plt.gcf())
def RenderAndSave(upd, filename): file = h5py.File(filename, 'r') nlev = int(file.attrs.get('NLEV')) data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)]) data_1 = ( np.array(file['Channel']['lev_0']['upd_'+str(upd)]) if nlev == 1 else np.array(file['Channel']['lev_1']['upd_'+str(upd)]) ) image = np.array([ [ tuple(colorsys.hsv_to_rgb( (val_1/2**63)%1.0, (val_0/2**63)%0.6 + 0.4, 1.0 )) if val_0 and val_1 else (0,0,0) for val_0, val_1 in zip(row_0, row_1) ] for row_0, row_1 in zip(data_0, data_1)]) plt.figure(figsize=(18,18)) plt.imshow( image, extent = (0, image.shape[1], image.shape[0], 0) ) plt.axis('off') plt.grid(b=None) lines_0 = LineCollection([ ((x,y), dest) for x in range(image.shape[0]) for y in range(image.shape[1]) for dest in ((x+1,y), (x,y+1)) if data_0[y][x] != data_0[dest[1]-1][dest[0]-1] ], linestyle='solid', colors='white') plt.gca().add_collection(lines_0) lines_1 = LineCollection([ ((x,y), dest) for x in range(image.shape[0]) for y in range(image.shape[1]) for dest in ((x+1,y), (x,y+1)) if data_1[y][x] != data_1[dest[1]-1][dest[0]-1] ], linestyle='solid', colors='black') plt.gca().add_collection(lines_1) plt.savefig( kn.pack({ 'title' : 'channel_viz', 'update' : str(upd), 'seed' : kn.unpack(filename)['seed'], 'treat' : kn.unpack(filename)['treat'], '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]), '_script_fullcat_hash' : fsh.FilesHash( file_parcel="full_parcel", files_join="cat_join" ).hash_files([sys.argv[0]]), '_source_hash' :kn.unpack(filename)['_source_hash'], 'ext' : '.png' }), transparent=True, bbox_inches='tight', pad_inches=0 ) plt.clf() plt.close(plt.gcf())
import h5py import sys from tqdm import tqdm import seaborn as sns import pandas as pd import matplotlib.pyplot as plt import os from keyname import keyname as kn from fileshash import fileshash as fsh from joblib import delayed, Parallel from natsort import natsorted filenames = sys.argv[1:] # check all data is from same software source assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1 def CalcCountShare(filename): file = h5py.File(filename, 'r') zips = zip(*( zip( np.array(file['Live'][upd_key]).flatten(), (sum(z) for z in zip(*( np.array( file['ResourceContributed']['dir_'+str(dir)][upd_key] ).flatten() for dir in range(5) ))), ) for upd_key in natsorted([key for key in file['Live']])[-16:] )) proportion = sum([
def RenderAndSave(upd, filename): file = h5py.File(filename, 'r') nlev = int(file.attrs.get('NLEV')) channel = np.array( file['Channel']['lev_'+str(nlev-1)]['upd_'+str(upd)] ).flatten() regulators = [ np.array( file['Regulators']['dir_'+str(dir)]['upd_'+str(upd)] ).flatten() for dir in range(4) ] live = np.array(file['Live']['upd_'+str(upd)]) index = np.array(file['Index']['own']) data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)]) data_1 = ( np.array(file['Channel']['lev_0']['upd_'+str(upd)]) if nlev == 1 else np.array(file['Channel']['lev_1']['upd_'+str(upd)]) ) # get unique group IDs ids = { id for id in channel.flatten() } # for each group, get all regulators cmapper = {} for id in ids: tags_to_regs = [] idxs = [] for flat_idx, idx in enumerate(index.flatten()): if channel[flat_idx] == id: idxs.append(idx) if live.flatten()[flat_idx]: archives = [ json.loads( regulator[flat_idx].decode("utf-8") )['value0'] for regulator in regulators ] tags = { d['key'] : d['value']['value0']['value0'] for d in archives[0]['tags'] } regulatorsum = defaultdict(lambda: 0.0) for archive in archives: for d in archive['regulators']: regulatorsum[d['key']] += d['value'] tags_to_regs.append({ tags[uid] : regulatorsum[uid] for uid in archives[0]['uids'] }) df = pd.DataFrame.from_records( tags_to_regs ).fillna(1) if pcamapper[id] is not None: pca, cols, minv, ptpv = pcamapper[id] pc = pca.transform(df[cols].to_numpy()) pc = (pc - minv) / ptpv for idx, row in zip(idxs, pc): cmapper[idx] = ( row[0] if row.size >= 1 and not np.isnan(row[0]) else 0.5, row[1] if row.size >= 2 and not np.isnan(row[1]) else 0.5, row[2] if row.size >= 3 and not np.isnan(row[2]) else 0.5, ) else: for idx in idxs: cmapper[idx] = (0.5, 0.5, 0.5) image = np.array([ [ cmapper[val_index] if val_live else (0.0,0.0,0.0) for val_index, val_live in zip(row_index, row_live) ] for row_index, row_live in zip(index, live)]) plt.figure(figsize=(18,18)) plt.imshow( image, extent = (0, image.shape[1], image.shape[0], 0) ) plt.axis('off') plt.grid(b=None) lines_0 = LineCollection([ ((x,y), dest) for x in range(image.shape[0]) for y in range(image.shape[1]) for dest in ((x+1,y), (x,y+1)) if data_0[y][x] != data_0[dest[1]-1][dest[0]-1] ], linestyle='solid', colors='white') plt.gca().add_collection(lines_0) lines_1 = LineCollection([ ((x,y), dest) for x in range(image.shape[0]) for y in range(image.shape[1]) for dest in ((x+1,y), (x,y+1)) if data_1[y][x] != data_1[dest[1]-1][dest[0]-1] ], linestyle='solid', colors='black') plt.gca().add_collection(lines_1) plt.savefig( kn.pack({ 'title' : 'consistent_regulator_viz', 'update' : str(upd), 'seed' : kn.unpack(filename)['seed'], 'treat' : kn.unpack(filename)['treat'], '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]), '_script_fullcat_hash' : fsh.FilesHash( file_parcel="full_parcel", files_join="cat_join" ).hash_files([sys.argv[0]]), '_source_hash' :kn.unpack(filename)['_source_hash'], 'ext' : '.png' }), transparent=True, bbox_inches='tight', pad_inches=0 ) plt.clf() plt.close(plt.gcf())
import numpy as np import random import sys from dishpylib.pyloaders import genome_local_autoload random.seed(1) try: __, less_ops, more_ops, num_interpolation_steps = sys.argv num_interpolation_steps = int(num_interpolation_steps) except: print(__doc__) sys.exit(1) assert 'ext' in kn.unpack(less_ops) assert 'ext' in kn.unpack(more_ops) less_ops_data = genome_local_autoload(less_ops) more_ops_data = genome_local_autoload(more_ops) less_ops_num_insts = len(less_ops_data['value0']['program']) more_ops_num_insts = len(more_ops_data['value0']['program']) assert less_ops_num_insts == more_ops_num_insts less_ops_num_ops = sum('Nop-' not in inst['operation'] for inst in less_ops_data["value0"]["program"]) more_ops_num_ops = sum('Nop-' not in inst['operation'] for inst in more_ops_data["value0"]["program"])
import numpy as np import h5py import sys from tqdm import tqdm import seaborn as sns import pandas as pd import matplotlib.pyplot as plt import os from keyname import keyname as kn from fileshash import fileshash as fsh from pathlib import Path filenames = sys.argv[1:] # check all data is from same software source assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1 dfs = [] for filename in tqdm(filenames): df = pd.read_csv(filename) df["seed"] = int(kn.unpack(Path(filename).parts[-2])["seed"]) df["step"] = int(kn.unpack(Path(filename).parts[-2])["step"]) dfs.append(df) df = pd.concat(dfs) df = df[df["step"] < 1050] outfile = kn.pack({
} for upds in upd_groups for lupds in (list(upds), ) for obs, upd in enumerate(lupds) for idx, row in raw[raw['Update'] == upd].iterrows()]) ax = sns.lineplot(x="Update", y="Per-Cell-Update Apoptosis Rate", hue="Type", data=proc) ax.set_ylim(ymin=0.0, ymax=0.0003) outfile = kn.pack({ 'title': 'apoptosis_series', 'treat': kn.unpack(dataframe_filename)['treat'], 'seed': kn.unpack(dataframe_filename)['seed'], '_data_hathash_hash': fsh.FilesHash().hash_files([dataframe_filename]), '_script_fullcat_hash': fsh.FilesHash(file_parcel="full_parcel", files_join="cat_join").hash_files([sys.argv[0]]), '_source_hash': kn.unpack(dataframe_filename)['_source_hash'], 'ext': '.pdf' }) ax.get_figure().savefig(outfile, transparent=True,
import numpy as np import h5py import sys from tqdm import tqdm import seaborn as sns import pandas as pd import matplotlib.pyplot as plt import os from keyname import keyname as kn from fileshash import fileshash as fsh from pathlib import Path filenames = sys.argv[1:] # check all data is from same software source assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1 dfs = [] for filename in tqdm(filenames): # adapted from https://stackoverflow.com/a/48135340 try: df = pd.read_csv(filename) df["seed"] = int(kn.unpack(Path(filename).parts[-2])["seed"]) df["step"] = int(kn.unpack(Path(filename).parts[-2])["step"]) dfs.append(df) except pd.io.common.EmptyDataError: print(filename, " is empty and has been skipped.") df = pd.concat(dfs)
def RenderAndSave(upd, filename): file = h5py.File(filename, 'r') nlev = int(file.attrs.get('NLEV')) top = np.array(file['InboxTraffic']['dir_0']['upd_'+str(upd)]) bottom = np.array(file['InboxTraffic']['dir_1']['upd_'+str(upd)]) left = np.array(file['InboxTraffic']['dir_3']['upd_'+str(upd)]) right = np.array(file['InboxTraffic']['dir_2']['upd_'+str(upd)]) live = np.array(file['Live']['upd_'+str(upd)]) data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)]) data_1 = ( np.array(file['Channel']['lev_0']['upd_'+str(upd)]) if nlev == 1 else np.array(file['Channel']['lev_1']['upd_'+str(upd)]) ) image = np.flip(np.rot90(np.transpose(np.block([ [ np.transpose(RenderTriangles( val_top, val_bottom, val_right, val_left, val_live )) for val_top, val_bottom, val_left, val_right, val_live in zip( row_top, row_bottom, row_left, row_right, row_live ) ] for row_top, row_bottom, row_left, row_right, row_live in zip( top, bottom, left, right, live ) ])),k=1),axis=0) plt.figure(figsize=(18,18)) plt.imshow( image, extent = (0, image.shape[1], image.shape[0], 0) ) plt.axis('off') plt.grid(b=None) rescale = lambda coord: [v * 42 for v in coord] lines_0 = LineCollection([ [ rescale(coord) for coord in ((x,y), dest) ] for x in range(data_0.shape[0]) for y in range(data_0.shape[1]) for dest in ((x+1,y), (x,y+1)) if data_0[y][x] != data_0[dest[1]-1][dest[0]-1] ], linestyle=(0, (1, 3)), colors='0.5') plt.gca().add_collection(lines_0) lines_1 = LineCollection([ [ rescale(coord) for coord in ((x,y), dest) ] for x in range(data_1.shape[0]) for y in range(data_1.shape[1]) for dest in ((x+1,y), (x,y+1)) if data_1[y][x] != data_1[dest[1]-1][dest[0]-1] ], linestyle='solid', colors='black') plt.gca().add_collection(lines_1) plt.savefig( kn.pack({ 'title' : 'directional_messaging_viz', 'update' : str(upd), 'seed' : kn.unpack(filename)['seed'], 'treat' : kn.unpack(filename)['treat'], '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]), '_script_fullcat_hash' : fsh.FilesHash( file_parcel="full_parcel", files_join="cat_join" ).hash_files([sys.argv[0]]), '_source_hash' :kn.unpack(filename)['_source_hash'], 'ext' : '.png' }), transparent=True, bbox_inches='tight', pad_inches=0 ) plt.clf() plt.close(plt.gcf())