def genome_s3_autoload(s3_url):

    s3 = boto3.client('s3')

    bucket = re.search('s3://(.+?)/', s3_url).group(1)
    object_name = re.search(f's3://{bucket}/(.+)', s3_url).group(1)

    with tempfile.NamedTemporaryFile(mode='w+b') as temp:
        print('temporary file', temp.name)

        s3.download_fileobj(bucket, object_name, temp)

        if kn.unpack(s3_url)['ext'] == '.json':
            with open(temp.name, 'r') as f:
                return json.load(f)

        elif kn.unpack(s3_url)['ext'] == '.json.gz':
            try:
                with gzip.open(temp.name, 'rb') as f:
                    return json.loads(f.read().decode('ascii'))
            except Exception:
                pass

            try:
                with gzip.open(temp.name, 'rb') as f:
                    return json.loads(f.read().decode('utf-8'))
            except Exception:
                pass

        raise ValueError
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    death = np.array(file['Death']['upd_' + str(upd)])
    live = np.array(file['Live']['upd_' + str(upd)])

    image = np.array([
        [
            # dead
            (0.0, 0.0, 0.0) if not val_live else {
                0: (1.0, 1.0, 1.0),  # alive
                1: (0.0, 1.0, 0.0),  # apoptosis
                2: (1.0, 0.0, 0.0),  # bankrupt
                3: (0.0, 0.0, 1.0),  # replaced
            }[val_death] for val_death, val_live in zip(row_death, row_live)
        ] for row_death, row_live in zip(death, live)
    ])

    plt.figure(figsize=(18, 18))

    plt.imshow(image, extent=(0, image.shape[1], image.shape[0], 0))

    plt.axis('off')
    plt.grid(b=None)

    lines = LineCollection([((x, y), dest) for x in range(image.shape[0])
                            for y in range(image.shape[1])
                            for dest in ((x + 1, y), (x, y + 1))],
                           linestyle='solid',
                           colors='black')
    plt.gca().add_collection(lines)

    plt.savefig(kn.pack({
        'title':
        'death_viz',
        'update':
        str(upd),
        'seed':
        kn.unpack(filename)['seed'],
        'treat':
        kn.unpack(filename)['treat'],
        '_data_hathash_hash':
        fsh.FilesHash().hash_files([filename]),
        '_script_fullcat_hash':
        fsh.FilesHash(file_parcel="full_parcel",
                      files_join="cat_join").hash_files([sys.argv[0]]),
        '_source_hash':
        kn.unpack(filename)['_source_hash'],
        'ext':
        '.png'
    }),
                transparent=True,
                bbox_inches='tight',
                pad_inches=0)

    plt.clf()
    plt.close(plt.gcf())
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    live = np.array(file['Live']['upd_' + str(upd)])
    pop = np.array(file['Population']['upd_' + str(upd)])
    triggers = np.array(file['Triggers']['upd_' + str(upd)])

    image = np.array([[(0.0, 0.0, 0.0) if not val_live else (1.0, 1.0, 1.0)
                       for val_live in row_live] for row_live in live])

    plt.figure(figsize=(18, 18))

    plt.imshow(image, extent=(0, image.shape[1], image.shape[0], 0))

    plt.axis('off')
    plt.grid(b=None)

    lines = LineCollection(
        [((x, y), dest) for x in range(image.shape[0])
         for y in range(image.shape[1]) for dest in ((x + 1, y), (x, y + 1))
         if (pop[y][x] != pop[dest[1] - 1][dest[0] - 1]
             or triggers[y][x] != triggers[dest[1] - 1][dest[0] - 1])],
        linestyle='solid',
        colors='red')
    plt.gca().add_collection(lines)

    plt.savefig(kn.pack({
        'title':
        'death_viz',
        'update':
        str(upd),
        'seed':
        kn.unpack(filename)['seed'],
        'treat':
        kn.unpack(filename)['treat'],
        '_data_hathash_hash':
        fsh.FilesHash().hash_files([filename]),
        '_script_fullcat_hash':
        fsh.FilesHash(file_parcel="full_parcel",
                      files_join="cat_join").hash_files([sys.argv[0]]),
        '_source_hash':
        kn.unpack(filename)['_source_hash'],
        'ext':
        '.png'
    }),
                transparent=True,
                bbox_inches='tight',
                pad_inches=0)

    plt.clf()
    plt.close(plt.gcf())
Beispiel #4
0
def get_critical_sites(variant_df, control_fits_df):

    # count competions where both strains went extinct simultaneously
    # as 0 Fitness Differential
    na_rows = variant_df['Fitness Differential'].isna()
    assert all(variant_df[na_rows]['Population Extinct'])
    variant_df['Fitness Differential'].fillna(
        0,
        inplace=True,
    )

    res = {}
    for series in variant_df['Competition Series'].unique():

        series_df = variant_df[variant_df['Competition Series'] == series]

        wt_vs_variant_df = series_df[
            series_df['genome variation'] != 'master'].reset_index()

        h0_fit = ip.popsingleton(
            control_fits_df[control_fits_df['Series'] == series].to_dict(
                orient='records', ))

        # calculate the probability of observing fitness differential result
        # under control data distribution
        if len(wt_vs_variant_df):
            wt_vs_variant_df['p'] = wt_vs_variant_df.apply(
                lambda row: stats.t.cdf(
                    row['Fitness Differential'],
                    h0_fit['Fit Degrees of Freedom'],
                    loc=h0_fit['Fit Loc'],
                    scale=h0_fit['Fit Scale'],
                ),
                axis=1,
            )
        else:
            # special case for an empty dataframe
            # to prevent an exception
            wt_vs_variant_df['p'] = []

        p_thresh = 1.0 / 100
        less_fit_variants = wt_vs_variant_df[wt_vs_variant_df['p'] < p_thresh]

        variation_attrs = [
            kn.unpack(kn.promote(variation), source_attr=False)
            for variation in less_fit_variants['genome variation']
        ]

        assert all('Nop-' in ip.popsingleton(attrs.values())
                   for attrs in variation_attrs)

        critical_idxs = [
            int(re.search('^i(\d+)$', ip.popsingleton(attrs.keys())).group(1))
            for attrs in variation_attrs
        ]

        res[series] = set(critical_idxs)

    return res
def genome_local_autoload(target):
    if kn.unpack(target)['ext'] == '.json':
        with open(target, 'r') as f:
            return json.load(f)

    elif kn.unpack(target)['ext'] == '.json.gz':
        try:
            with gzip.open(target, 'rb') as f:
                return json.loads(f.read().decode('ascii'))
        except Exception:
            pass

        try:
            with gzip.open(target, 'rb') as f:
                return json.loads(f.read().decode('utf-8'))
        except Exception:
            pass

    raise ValueError
Beispiel #6
0
def make_output_filename():

    df = pd.DataFrame.from_records(
        [kn.unpack(source_filename) for source_filename in sys.argv[1:]], )

    out_attrs = {
        column: ib.dub(df[column])
        for column in df.columns if column not in {
            '_',
            'ext',
        }
    }

    out_filename = kn.pack({
        **out_attrs,
        **{
            'concat': str(len(sys.argv) - 1),
            'ext': '.csv',
        },
    })

    return out_filename
Beispiel #7
0
from keyname import keyname as kn
import sys
import pandas as pd

filenames = sys.argv[1:]

dataframes = []

for filename in filenames:
    df = pd.read_csv(filename)

    for k, v in kn.unpack(filename).items():
        if k not in ['_', 'ext']:
            df[k] = v

    dataframes.append(df)

res = pd.concat(dataframes, ignore_index=True)

res.to_csv("consolidated.csv", index=False)
import numpy as np
import sys
from tqdm import tqdm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
from keyname import keyname as kn
from fileshash import fileshash as fsh
from joblib import delayed, Parallel

upd = int(sys.argv[1])
filenames = sys.argv[2:]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1

def LoadRelevantData(filename):
    return pd.concat([
        chunk[chunk['update'] == upd]
        for chunk in pd.read_csv(filename, iterator=True, chunksize=2048)
    ])

def SafeLoadRelevantData(filename):
    try:
        res = LoadRelevantData(filename)
        if not len(res):
            raise ValueError("update " + str(upd) + " not found")
        return res
    except Exception as e:
        print("warning: corrupt or incomplete data file... skipping")
Beispiel #9
0
        np.max(egv_cvals),
        "Eigenvector Centrality Variance":
        np.var(egv_cvals),
        "Maximum Load Centrality":
        np.max(load_cvals),
        "Median Load Centrality":
        np.median(load_cvals),
        "Load Centrality Variance":
        np.var(load_cvals),
        "Minimum Spanning Weight":
        nx.minimum_spanning_tree(G.to_undirected()).size(weight='weight')
    })

outfile = kn.pack({
    'title':
    kn.unpack(dataframe_filename)['title'] + "-stats",
    'bitweight':
    kn.unpack(dataframe_filename)['bitweight'],
    'seed':
    kn.unpack(dataframe_filename)['seed'],
    '_data_hathash_hash':
    fsh.FilesHash().hash_files([dataframe_filename]),
    '_script_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([sys.argv[0]]),
    # '_source_hash' :kn.unpack(dataframe_filename)['_source_hash'],
    'ext':
    '.csv'
})
pd.DataFrame.from_records(out).to_csv(outfile, index=False)
print("output saved to", outfile)
matplotlib.rcParams['pdf.fonttype'] = 42

df_key = pd.read_csv(sys.argv[1])

dataframe_filenames = sys.argv[2:]

dfs = [(filename, pd.read_csv(filename)) for filename in dataframe_filenames]

print("Data loaded!")

res = []
for filename, df in dfs:

    df = df.groupby(['Genome Position']).mean().reset_index()

    for k, v in kn.unpack(filename).items():
        df[k] = v

    res.append(df)

df_data = pd.concat(res)

df_data['Slug'] = df_data['metric-slug']

key = {
    row['Slug']:
    {col: row[col]
     for col, val in row.iteritems() if col != 'Slug'}
    for idx, row in df_key.iterrows()
}
Beispiel #11
0
ax.set(
    ylim=(0,
          df[df["Cause"] == "Apoptosis"]["Per-Cell-Update Death Rate"].max() *
          1.1))
plt.xticks(rotation=-90)

outfile = kn.pack({
    'title':
    'apoptosis',
    '_data_hathash_hash':
    fsh.FilesHash().hash_files([dataframe_filename]),
    '_script_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([sys.argv[0]]),
    '_source_hash':
    kn.unpack(dataframe_filename)['_source_hash'],
    'ext':
    '.pdf'
})

ax.get_figure().savefig(outfile,
                        transparent=True,
                        bbox_inches='tight',
                        pad_inches=0)

print('Output saved to', outfile)

plt.clf()

ax = sns.barplot(
    x="Treatment",
Beispiel #12
0
import h5py
import sys
import scipy.stats as stats
from tqdm import tqdm
import os
import pandas as pd
from keyname import keyname as kn
from fileshash import fileshash as fsh
from joblib import delayed, Parallel

first_update = int(sys.argv[1])
last_update = int(sys.argv[2])
filenames = sys.argv[3:]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash']
            for filename in filenames}) == 1


def CalcSurroundedRate(filename):
    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))
    return np.mean([
        sum(ro[idx] != -1 for ro in ros) for ch, pc, dirs, live, ros in [(
            np.array(file['Channel']['lev_' +
                                     str(nlev - 1)]['upd_' +
                                                    str(upd)]).flatten(),
            np.array(file['PrevChan']['upd_' + str(upd)]).flatten(), [
                np.array(file['Index'][dir_key]).flatten()
                for dir_key in file['RepOutgoing']
            ], np.array(file['Live']['upd_' + str(upd)]).flatten(), [
Beispiel #13
0
import os
from tqdm import tqdm
import pandas as pd
from keyname import keyname as kn
from fileshash import fileshash as fsh
import re
from collections import Counter, defaultdict
from joblib import delayed, Parallel
import json

num_files = int(sys.argv[1])
filenames = sys.argv[2:num_files+2]
updates = [int(v) for v in sys.argv[num_files+2:]]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1

def GroupShape(filename):

    file = h5py.File(filename, 'r')
    indices = {
        idx : i
        for i, idx in enumerate(np.array(file['Index']['own']).flatten())
    }
    neighs = [
        np.array(file['Index']['dir_'+str(dir)]).flatten()
        for dir in range(4)
    ]
    nlev = int(file.attrs.get('NLEV'))

    res = defaultdict(lambda: [0 for __ in range(5)])
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    channel = np.array(
        file['Channel']['lev_'+str(nlev-1)]['upd_'+str(upd)]
    ).flatten()
    regulator = [
        np.array(
            file['Regulators']['dir_'+str(dir)]['upd_'+str(upd)]
        ).flatten()
        for dir in range(4)
    ]
    decoder = np.array(
        file['Regulators']['decoder']['upd_'+str(upd)]
    ).flatten()
    live = np.array(file['Live']['upd_'+str(upd)])
    index = np.array(file['Index']['own'])

    data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)])
    data_1 = (
        np.array(file['Channel']['lev_0']['upd_'+str(upd)])
        if nlev == 1 else
        np.array(file['Channel']['lev_1']['upd_'+str(upd)])
    )

    # get unique group IDs
    ids = { id for id in channel.flatten() }

    # for each group, get all functions
    cmapper = [ {} for dir in range(4) ]
    for id in ids:
        tags_to_regs = []
        idxs = []
        dirs = []
        for flat_idx, idx in enumerate(index.flatten()):
            if channel[flat_idx] == id:
                for dir in range(4):
                    idxs.append( idx )
                    dirs.append( dir )
                    if live.flatten()[flat_idx]:
                        archive = json.loads(
                            decoder[regulator[dir][flat_idx]].decode("utf-8")
                        )['value0']
                        tags = {
                            d['key'] : d['value']['value0']['value0']
                            for d in archive['tags']
                        }
                        regulators = {
                            d['key'] : d['value']
                            for d in archive['regulators']
                        }
                        tags_to_regs.append({
                            tags[uid] : regulators[uid]["state"] for uid in archive['uids']
                        })

        df = pd.DataFrame.from_records(tags_to_regs).fillna(1)

        n=min(3, len(df.columns), len(df))
        if n:
            pca = PCA(n_components=n)

            pc = None
            with warnings.catch_warnings():
                # ignore sklearn and divide by zero warnings
                # (we handle them below)
                warnings.simplefilter("ignore")
                pc = pca.fit_transform(df.to_numpy())
                pc = (pc - pc.min(0)) / pc.ptp(0)

            for idx, dir, row in zip(idxs, dirs, pc):
                cmapper[dir][idx] = (
                    row[0] if row.size >= 1 and not np.isnan(row[0]) else 0.5,
                    row[1] if row.size >= 2 and not np.isnan(row[1]) else 0.5,
                    row[2] if row.size >= 3 and not np.isnan(row[2]) else 0.5,
                )
        else:
            for idx, dir in zip(idxs, dirs):
                cmapper[dir][idx] = (0.5, 0.5, 0.5)

    image = np.flip(np.rot90(np.transpose(np.block([
        [
            np.transpose(RenderTriangles(
                cmapper[0][val_index],
                cmapper[1][val_index],
                cmapper[2][val_index],
                cmapper[3][val_index],
                val_live
            )) for val_live, val_index in zip(row_live, row_index)
        ]
        for row_live, row_index
        in zip(live, index)
    ])),k=1),axis=0)

    plt.figure(figsize=(18,18))

    plt.imshow(
        image,
        extent = (0, image.shape[1], image.shape[0], 0)
    )

    plt.axis('off')
    plt.grid(b=None)

    rescale = lambda coord: [v * 42 for v in coord]
    lines_0 = LineCollection([
        [ rescale(coord) for coord in ((x,y), dest) ]
        for x in range(index.shape[0])
        for y in range(index.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_0[y][x] != data_0[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='white', linewidths=(2,))
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection([
        [ rescale(coord) for coord in ((x,y), dest) ]
        for x in range(index.shape[0])
        for y in range(index.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_1[y][x] != data_1[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='black', linewidths=(2,))
    plt.gca().add_collection(lines_1)

    plt.savefig(
        kn.pack({
            'title' : 'directional_regulator_viz',
            'update' : str(upd),
            'seed' : kn.unpack(filename)['seed'],
            'treat' : kn.unpack(filename)['treat'],
            '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]),
            '_script_fullcat_hash' : fsh.FilesHash(
                                                file_parcel="full_parcel",
                                                files_join="cat_join"
                                            ).hash_files([sys.argv[0]]),
            '_source_hash' :kn.unpack(filename)['_source_hash'],
            'ext' : '.png'
        }),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0
    )

    plt.clf()
    plt.close(plt.gcf())
Beispiel #15
0
    prefix_url, regex = sys.argv[1:]
except:
    print('bad arguments')
    print('USAGE: [prefix_url] [regex]')
    sys.exit(1)

bucket = re.search('s3://(.+?)/', prefix_url).group(1)
prefix = re.search(f's3://{bucket}/(.+)', prefix_url).group(1)

print(f'prefix_url {prefix_url}')
print(f'regex {regex}')
print(f'bucket {bucket}')
print(f'prefix {prefix}')

assert prefix.count('stage=') == 1 and prefix.count('what=') == 1
assert any('stage' in kn.unpack(segment) and 'what' in kn.unpack(segment)
           for segment in prefix.split('/'))

stages, = [
    list(kn.unpack(segment)['stage'].split('~'))
    for segment in prefix.split('/')
    if 'stage' in kn.unpack(segment) and 'what' in kn.unpack(segment)
]

print(f'stages {stages}')

prefixes = [
    '/'.join(
        kn.pack({
            **kn.unpack(segment),
            **{
Beispiel #16
0
    prefix_url, regex = sys.argv[1:]
except:
    print(__doc__)
    sys.exit(1)

bucket = re.search('s3://(.+?)/', prefix_url).group(1)
prefix = re.search(f's3://{bucket}/(.+)', prefix_url).group(1)

print(f'prefix_url {prefix_url}')
print(f'regex {regex}')
print(f'bucket {bucket}')
print(f'prefix {prefix}')

assert prefix.count('stage=') == 1 and prefix.count('what=') == 1
assert any(
    'stage' in kn.unpack(segment) and 'what' in kn.unpack(segment)
    for segment in prefix.split('/')
)

stages, = [
    list(kn.unpack(segment)['stage'].split('~'))
    for segment in prefix.split('/')
    if 'stage' in kn.unpack(segment) and 'what' in kn.unpack(segment)
]

print(f'stages {stages}')

prefixes = [ '/'.join(
    kn.pack({
        **kn.unpack(segment),
        **{ 'stage' : stage, },
Beispiel #17
0
g = sns.barplot(
    data=df_data,
    x='Metric',
    y='Match Distance',
    order=sorted(['Hamming', 'Hash', 'Integer', 'Streak', 'Integer (bi)']),
)
g.set(ylim=(0, 1))
g.set_xticklabels(g.get_xticklabels(), rotation=90)

plt.gcf().set_size_inches(3.75, 2.75)

outfile = kn.pack({
    'title':
    'dimensionality_barplot',
    'bitweight':
    kn.unpack(dataframe_filename)['bitweight'],
    'seed':
    kn.unpack(dataframe_filename)['seed'],
    '_data_hathash_hash':
    fsh.FilesHash().hash_files([dataframe_filename]),
    '_script_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([sys.argv[0]]),
    # '_source_hash' :kn.unpack(dataframe_filename)['_source_hash'],
    'ext':
    '.pdf'
})
plt.savefig(outfile, transparent=True, bbox_inches='tight', pad_inches=0)
print("output saved to", outfile)

plt.clf()
Beispiel #18
0
])['Elapsed Generations', ].diff(periods=-1)

df['Generations Per Update'] = df['Elapsed Generations Delta'] / df['Update']

################################################################################
print()
print('calculating upload path')
print('---------------------------------------------------------------------')
################################################################################

# common_keys = set.intersection(*[
#     set( kn.unpack(source).keys() )
#     for source in sources
# ])

out_filename = kn.pack(kn.unpack(data_url.key), )

out_prefix = f'endeavor={endeavor}/thread-profiles/stage=8+what=elaborated/'
out_path = out_prefix + out_filename

print(f'upload path will be s3://{bucket}/{out_path}')

################################################################################
print()
print('dumping and uploading')
print('---------------------------------------------------------------------')
################################################################################

# have to work with filename or pandas compression doesn't work
with tempfile.TemporaryDirectory() as tmp:
Beispiel #19
0
def load_json(filename):
    with open(filename) as json_file:
        data = json.load(json_file)
    return data


res = defaultdict(list)

for filename, entry in [(filename, load_json(filename))
                        for filename in sys.argv[1:]]:
    for benchmark in entry['benchmarks']:
        res[frozendict({
            'run_type': benchmark['run_type'],
        })].append({
            'Library':
            kn.unpack(filename)['library'],
            'Implementation':
            'vanilla' if 'SignalGP' in filename else 'lite',
            'Statistic': (benchmark['aggregate_name']
                          if 'aggregate_name' in benchmark else 'measurement'),
            'Wall Nanoseconds':
            benchmark['real_time'],
            'CPU Nanoseconds':
            benchmark['cpu_time'],
            'num agents':
            benchmark['num agents'],
        })

for run_specs, rows in res.items():
    pd.DataFrame(rows).to_csv(
        kn.pack({
Beispiel #20
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    own = np.array(file['Index']['own']).flatten()
    dirs = {
        'top': np.array(file['Index']['dir_0']).flatten(),
        'bottom': np.array(file['Index']['dir_1']).flatten(),
        'left': np.array(file['Index']['dir_3']).flatten(),
        'right': np.array(file['Index']['dir_2']).flatten(),
    }

    chans = [
        np.array(file['Channel']['lev_' + str(lev)]['upd_' +
                                                    str(upd)]).flatten()
        for lev in range(nlev)
    ]
    cage = np.array(file['CellAge']['upd_' + str(upd)]).flatten()
    pvch = np.array(file['PrevChan']['upd_' + str(upd)]).flatten()
    ppos = np.array(file['ParentPos']['upd_' + str(upd)]).flatten()

    live = np.array(file['Live']['upd_' + str(upd)])

    data_0 = np.array(file['Channel']['lev_0']['upd_' + str(upd)])
    data_1 = (np.array(file['Channel']['lev_0']['upd_' + str(upd)]) if nlev
              == 1 else np.array(file['Channel']['lev_1']['upd_' + str(upd)]))

    res = defaultdict(dict)
    for idx in range(own.size):
        for dir, drct in dirs.items():
            type = NONE
            if pvch[idx] == chans[-1][drct[idx]]:
                type = P_CHILD
            elif pvch[drct[idx]] == chans[-1][idx]:
                type = P_PARENT
            else:
                # grayscale channel ID
                type = (chans[-1][idx] / 2**64) * 0.8

            res[own[idx]][dir] = type

    own = np.array(file['Index']['own'])
    live = np.array(file['Live']['upd_' + str(upd)])

    image = np.flip(np.rot90(np.transpose(
        np.block([[
            np.transpose(
                RenderTriangles(res[val_own]['top'], res[val_own]['bottom'],
                                res[val_own]['right'], res[val_own]['left'],
                                val_live))
            for val_own, val_live in zip(row_own, row_live)
        ] for row_own, row_live in zip(own, live)])),
                             k=1),
                    axis=0)

    plt.figure(figsize=(18, 18))

    plt.imshow(image, extent=(0, image.shape[1], image.shape[0], 0))

    plt.axis('off')
    plt.grid(b=None)

    rescale = lambda coord: [v * 42 for v in coord]
    lines_0 = LineCollection(
        [[rescale(coord) for coord in ((x, y), dest)]
         for x in range(data_0.shape[0]) for y in range(data_0.shape[1])
         for dest in ((x + 1, y), (x, y + 1))
         if data_0[y][x] != data_0[dest[1] - 1][dest[0] - 1]],
        linestyle=(0, (1, 3)),
        colors='0.5')
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection(
        [[rescale(coord) for coord in ((x, y), dest)]
         for x in range(data_1.shape[0]) for y in range(data_1.shape[1])
         for dest in ((x + 1, y), (x, y + 1))
         if data_1[y][x] != data_1[dest[1] - 1][dest[0] - 1]],
        linestyle='solid',
        colors='black')
    plt.gca().add_collection(lines_1)

    plt.savefig(kn.pack({
        'title':
        'directional_propagule_viz',
        'update':
        str(upd),
        'seed':
        kn.unpack(filename)['seed'],
        'treat':
        kn.unpack(filename)['treat'],
        '_data_hathash_hash':
        fsh.FilesHash().hash_files([filename]),
        '_script_fullcat_hash':
        fsh.FilesHash(file_parcel="full_parcel",
                      files_join="cat_join").hash_files([sys.argv[0]]),
        '_source_hash':
        kn.unpack(filename)['_source_hash'],
        'ext':
        '.png'
    }),
                transparent=True,
                bbox_inches='tight',
                pad_inches=0)

    plt.clf()
    plt.close(plt.gcf())
Beispiel #21
0
    return np.mean([
        1 if apop[idx] == 2 else 0
        for apop in [np.array(file['Apoptosis']['upd_' + str(upd)]).flatten()]
        for idx in range(file['Index']['own'].size)
    ])


def ExtractUpdates(file):
    return [int(re.findall('\d+', key)[0]) for key in file['Apoptosis'].keys()]


outfile = kn.pack({
    'title':
    'apoptosis_series',
    'seed':
    kn.unpack(filename)['seed'],
    'treat':
    kn.unpack(filename)['treat'],
    '_data_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([filename]),
    '_script_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([sys.argv[0]]),
    '_source_hash':
    kn.unpack(filename)['_source_hash'],
    'ext':
    '.csv'
})

pd.DataFrame.from_dict([{
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    # display current stockpile AND inbound resource
    stock = np.array(file['Stockpile']['upd_'+str(upd)])
    share = np.array(file['TotalContribute']['upd_'+str(upd)])
    live = np.array(file['Live']['upd_'+str(upd)])

    data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)])
    data_1 = (
        np.array(file['Channel']['lev_0']['upd_'+str(upd)])
        if nlev == 1 else
        np.array(file['Channel']['lev_1']['upd_'+str(upd)])
    )

    image = np.array([
        [
            # dead
            (0.0, 0.0, 0.0)
            if not val_live else
            # enough resource to reproduce (green to yellow)
            (
                min(1.0, (val_stock + val_share) - 1.0),
                1.0,
                0.0
            ) if val_stock + val_share > 1.0 else
            # not yet enough resource to reproduce (blue)
            (
                1.0 - (val_stock + val_share),
                1.0 - (val_stock + val_share),
                1.0
            ) if val_stock + val_share > 0.0 else
            # netative resource (red)
            (
                1.0,
                max(0.0, 1.0 + (val_stock + val_share) / 1.25),
                max(0.0, 1.0 + (val_stock + val_share) / 1.25)
            )
            for val_stock, val_share, val_live in zip(row_stock, row_share, row_live)
        ]
    for row_stock, row_share, row_live in zip(stock, share, live)])

    plt.figure(figsize=(18,18))

    plt.imshow(
        image,
        extent = (0, image.shape[1], image.shape[0], 0)
    )

    plt.axis('off')
    plt.grid(b=None)

    lines_0 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_0[y][x] != data_0[dest[1]-1][dest[0]-1]
    ], linestyle=(0, (1, 3)), colors='0.5')
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_1[y][x] != data_1[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='black')
    plt.gca().add_collection(lines_1)

    plt.savefig(
        kn.pack({
            'title' : 'stockpile_viz',
            'update' : str(upd),
            'seed' : kn.unpack(filename)['seed'],
            'treat' : kn.unpack(filename)['treat'],
            '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]),
            '_script_fullcat_hash' : fsh.FilesHash(
                                                file_parcel="full_parcel",
                                                files_join="cat_join"
                                            ).hash_files([sys.argv[0]]),
            '_source_hash' :kn.unpack(filename)['_source_hash'],
            'ext' : '.png'
        }),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0
    )

    plt.clf()
    plt.close(plt.gcf())
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)])
    data_1 = (
        np.array(file['Channel']['lev_0']['upd_'+str(upd)])
        if nlev == 1 else
        np.array(file['Channel']['lev_1']['upd_'+str(upd)])
    )

    image = np.array([
        [
            tuple(colorsys.hsv_to_rgb(
                (val_1/2**63)%1.0,
                (val_0/2**63)%0.6 + 0.4,
                1.0
            ))
            if val_0 and val_1 else (0,0,0)
        for val_0, val_1 in zip(row_0, row_1)
        ]
    for row_0, row_1 in zip(data_0, data_1)])

    plt.figure(figsize=(18,18))

    plt.imshow(
        image,
        extent = (0, image.shape[1], image.shape[0], 0)
    )

    plt.axis('off')
    plt.grid(b=None)

    lines_0 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_0[y][x] != data_0[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='white')
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_1[y][x] != data_1[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='black')
    plt.gca().add_collection(lines_1)

    plt.savefig(
        kn.pack({
            'title' : 'channel_viz',
            'update' : str(upd),
            'seed' : kn.unpack(filename)['seed'],
            'treat' : kn.unpack(filename)['treat'],
            '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]),
            '_script_fullcat_hash' : fsh.FilesHash(
                                                file_parcel="full_parcel",
                                                files_join="cat_join"
                                            ).hash_files([sys.argv[0]]),
            '_source_hash' :kn.unpack(filename)['_source_hash'],
            'ext' : '.png'
        }),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0
    )

    plt.clf()
    plt.close(plt.gcf())
Beispiel #24
0
import h5py
import sys
from tqdm import tqdm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
from keyname import keyname as kn
from fileshash import fileshash as fsh
from joblib import delayed, Parallel
from natsort import natsorted

filenames = sys.argv[1:]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1

def CalcCountShare(filename):
    file = h5py.File(filename, 'r')
    zips = zip(*(
        zip(
            np.array(file['Live'][upd_key]).flatten(),
            (sum(z) for z in zip(*(
                np.array(
                    file['ResourceContributed']['dir_'+str(dir)][upd_key]
                    ).flatten()
                for dir in range(5)
            ))),
        ) for upd_key in natsorted([key for key in file['Live']])[-16:]
    ))
    proportion = sum([
Beispiel #25
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    channel = np.array(
        file['Channel']['lev_'+str(nlev-1)]['upd_'+str(upd)]
    ).flatten()
    regulators = [
        np.array(
            file['Regulators']['dir_'+str(dir)]['upd_'+str(upd)]
        ).flatten()
        for dir in range(4)
    ]
    live = np.array(file['Live']['upd_'+str(upd)])
    index = np.array(file['Index']['own'])

    data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)])
    data_1 = (
        np.array(file['Channel']['lev_0']['upd_'+str(upd)])
        if nlev == 1 else
        np.array(file['Channel']['lev_1']['upd_'+str(upd)])
    )

    # get unique group IDs
    ids = { id for id in channel.flatten() }

    # for each group, get all regulators
    cmapper = {}
    for id in ids:
        tags_to_regs = []
        idxs = []
        for flat_idx, idx in enumerate(index.flatten()):
            if channel[flat_idx] == id:
                idxs.append(idx)
                if live.flatten()[flat_idx]:
                    archives = [ json.loads(
                        regulator[flat_idx].decode("utf-8")
                    )['value0'] for regulator in regulators ]
                    tags = {
                        d['key'] : d['value']['value0']['value0']
                        for d in archives[0]['tags']
                    }

                    regulatorsum = defaultdict(lambda: 0.0)
                    for archive in archives:
                        for d in archive['regulators']:
                            regulatorsum[d['key']] += d['value']

                    tags_to_regs.append({
                        tags[uid] : regulatorsum[uid] for uid in archives[0]['uids']
                    })


        df = pd.DataFrame.from_records(
            tags_to_regs
        ).fillna(1)

        if pcamapper[id] is not None:
            pca, cols, minv, ptpv = pcamapper[id]
            pc = pca.transform(df[cols].to_numpy())
            pc = (pc - minv) / ptpv

            for idx, row in zip(idxs, pc):
                cmapper[idx] = (
                    row[0] if row.size >= 1 and not np.isnan(row[0]) else 0.5,
                    row[1] if row.size >= 2 and not np.isnan(row[1]) else 0.5,
                    row[2] if row.size >= 3 and not np.isnan(row[2]) else 0.5,
                )
        else:
            for idx in idxs:
                cmapper[idx] = (0.5, 0.5, 0.5)

    image = np.array([
        [
            cmapper[val_index] if val_live else (0.0,0.0,0.0)
            for val_index, val_live in zip(row_index, row_live)
        ]
    for row_index, row_live in zip(index, live)])

    plt.figure(figsize=(18,18))

    plt.imshow(
        image,
        extent = (0, image.shape[1], image.shape[0], 0)
    )

    plt.axis('off')
    plt.grid(b=None)

    lines_0 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_0[y][x] != data_0[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='white')
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_1[y][x] != data_1[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='black')

    plt.gca().add_collection(lines_1)

    plt.savefig(
        kn.pack({
            'title' : 'consistent_regulator_viz',
            'update' : str(upd),
            'seed' : kn.unpack(filename)['seed'],
            'treat' : kn.unpack(filename)['treat'],
            '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]),
            '_script_fullcat_hash' : fsh.FilesHash(
                                                file_parcel="full_parcel",
                                                files_join="cat_join"
                                            ).hash_files([sys.argv[0]]),
            '_source_hash' :kn.unpack(filename)['_source_hash'],
            'ext' : '.png'
        }),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0
    )

    plt.clf()
    plt.close(plt.gcf())
Beispiel #26
0
import numpy as np
import random
import sys

from dishpylib.pyloaders import genome_local_autoload

random.seed(1)

try:
    __, less_ops, more_ops, num_interpolation_steps = sys.argv
    num_interpolation_steps = int(num_interpolation_steps)
except:
    print(__doc__)
    sys.exit(1)

assert 'ext' in kn.unpack(less_ops)
assert 'ext' in kn.unpack(more_ops)

less_ops_data = genome_local_autoload(less_ops)
more_ops_data = genome_local_autoload(more_ops)

less_ops_num_insts = len(less_ops_data['value0']['program'])
more_ops_num_insts = len(more_ops_data['value0']['program'])

assert less_ops_num_insts == more_ops_num_insts

less_ops_num_ops = sum('Nop-' not in inst['operation']
                       for inst in less_ops_data["value0"]["program"])
more_ops_num_ops = sum('Nop-' not in inst['operation']
                       for inst in more_ops_data["value0"]["program"])
import numpy as np
import h5py
import sys
from tqdm import tqdm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
from keyname import keyname as kn
from fileshash import fileshash as fsh
from pathlib import Path

filenames = sys.argv[1:]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash']
            for filename in filenames}) == 1

dfs = []

for filename in tqdm(filenames):
    df = pd.read_csv(filename)
    df["seed"] = int(kn.unpack(Path(filename).parts[-2])["seed"])
    df["step"] = int(kn.unpack(Path(filename).parts[-2])["step"])
    dfs.append(df)

df = pd.concat(dfs)

df = df[df["step"] < 1050]

outfile = kn.pack({
    } for upds in upd_groups for lupds in (list(upds), )
     for obs, upd in enumerate(lupds)
     for idx, row in raw[raw['Update'] == upd].iterrows()])

ax = sns.lineplot(x="Update",
                  y="Per-Cell-Update Apoptosis Rate",
                  hue="Type",
                  data=proc)

ax.set_ylim(ymin=0.0, ymax=0.0003)

outfile = kn.pack({
    'title':
    'apoptosis_series',
    'treat':
    kn.unpack(dataframe_filename)['treat'],
    'seed':
    kn.unpack(dataframe_filename)['seed'],
    '_data_hathash_hash':
    fsh.FilesHash().hash_files([dataframe_filename]),
    '_script_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([sys.argv[0]]),
    '_source_hash':
    kn.unpack(dataframe_filename)['_source_hash'],
    'ext':
    '.pdf'
})

ax.get_figure().savefig(outfile,
                        transparent=True,
import numpy as np
import h5py
import sys
from tqdm import tqdm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
from keyname import keyname as kn
from fileshash import fileshash as fsh
from pathlib import Path

filenames = sys.argv[1:]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1

dfs = []

for filename in tqdm(filenames):
    # adapted from https://stackoverflow.com/a/48135340
    try:
        df = pd.read_csv(filename)
        df["seed"] = int(kn.unpack(Path(filename).parts[-2])["seed"])
        df["step"] = int(kn.unpack(Path(filename).parts[-2])["step"])
        dfs.append(df)
    except pd.io.common.EmptyDataError:
        print(filename, " is empty and has been skipped.")

df = pd.concat(dfs)
Beispiel #30
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    top = np.array(file['InboxTraffic']['dir_0']['upd_'+str(upd)])
    bottom = np.array(file['InboxTraffic']['dir_1']['upd_'+str(upd)])
    left = np.array(file['InboxTraffic']['dir_3']['upd_'+str(upd)])
    right = np.array(file['InboxTraffic']['dir_2']['upd_'+str(upd)])
    live = np.array(file['Live']['upd_'+str(upd)])

    data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)])
    data_1 = (
        np.array(file['Channel']['lev_0']['upd_'+str(upd)])
        if nlev == 1 else
        np.array(file['Channel']['lev_1']['upd_'+str(upd)])
    )

    image = np.flip(np.rot90(np.transpose(np.block([
        [
            np.transpose(RenderTriangles(
                val_top,
                val_bottom,
                val_right,
                val_left,
                val_live
            )) for val_top, val_bottom, val_left, val_right, val_live in zip(
                row_top,
                row_bottom,
                row_left,
                row_right,
                row_live
            )
        ]
        for row_top, row_bottom, row_left, row_right, row_live
        in zip(
            top,
            bottom,
            left,
            right,
            live
        )
    ])),k=1),axis=0)

    plt.figure(figsize=(18,18))

    plt.imshow(
        image,
        extent = (0, image.shape[1], image.shape[0], 0)
    )

    plt.axis('off')
    plt.grid(b=None)

    rescale = lambda coord: [v * 42 for v in coord]
    lines_0 = LineCollection([
        [ rescale(coord) for coord in ((x,y), dest) ]
        for x in range(data_0.shape[0])
        for y in range(data_0.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_0[y][x] != data_0[dest[1]-1][dest[0]-1]
    ], linestyle=(0, (1, 3)), colors='0.5')
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection([
        [ rescale(coord) for coord in ((x,y), dest) ]
        for x in range(data_1.shape[0])
        for y in range(data_1.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_1[y][x] != data_1[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='black')
    plt.gca().add_collection(lines_1)

    plt.savefig(
        kn.pack({
            'title' : 'directional_messaging_viz',
            'update' : str(upd),
            'seed' : kn.unpack(filename)['seed'],
            'treat' : kn.unpack(filename)['treat'],
            '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]),
            '_script_fullcat_hash' : fsh.FilesHash(
                                                file_parcel="full_parcel",
                                                files_join="cat_join"
                                            ).hash_files([sys.argv[0]]),
            '_source_hash' :kn.unpack(filename)['_source_hash'],
            'ext' : '.png'
        }),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0
    )

    plt.clf()
    plt.close(plt.gcf())