コード例 #1
0
def genome_s3_autoload(s3_url):

    s3 = boto3.client('s3')

    bucket = re.search('s3://(.+?)/', s3_url).group(1)
    object_name = re.search(f's3://{bucket}/(.+)', s3_url).group(1)

    with tempfile.NamedTemporaryFile(mode='w+b') as temp:
        print('temporary file', temp.name)

        s3.download_fileobj(bucket, object_name, temp)

        if kn.unpack(s3_url)['ext'] == '.json':
            with open(temp.name, 'r') as f:
                return json.load(f)

        elif kn.unpack(s3_url)['ext'] == '.json.gz':
            try:
                with gzip.open(temp.name, 'rb') as f:
                    return json.loads(f.read().decode('ascii'))
            except Exception:
                pass

            try:
                with gzip.open(temp.name, 'rb') as f:
                    return json.loads(f.read().decode('utf-8'))
            except Exception:
                pass

        raise ValueError
コード例 #2
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    death = np.array(file['Death']['upd_' + str(upd)])
    live = np.array(file['Live']['upd_' + str(upd)])

    image = np.array([
        [
            # dead
            (0.0, 0.0, 0.0) if not val_live else {
                0: (1.0, 1.0, 1.0),  # alive
                1: (0.0, 1.0, 0.0),  # apoptosis
                2: (1.0, 0.0, 0.0),  # bankrupt
                3: (0.0, 0.0, 1.0),  # replaced
            }[val_death] for val_death, val_live in zip(row_death, row_live)
        ] for row_death, row_live in zip(death, live)
    ])

    plt.figure(figsize=(18, 18))

    plt.imshow(image, extent=(0, image.shape[1], image.shape[0], 0))

    plt.axis('off')
    plt.grid(b=None)

    lines = LineCollection([((x, y), dest) for x in range(image.shape[0])
                            for y in range(image.shape[1])
                            for dest in ((x + 1, y), (x, y + 1))],
                           linestyle='solid',
                           colors='black')
    plt.gca().add_collection(lines)

    plt.savefig(kn.pack({
        'title':
        'death_viz',
        'update':
        str(upd),
        'seed':
        kn.unpack(filename)['seed'],
        'treat':
        kn.unpack(filename)['treat'],
        '_data_hathash_hash':
        fsh.FilesHash().hash_files([filename]),
        '_script_fullcat_hash':
        fsh.FilesHash(file_parcel="full_parcel",
                      files_join="cat_join").hash_files([sys.argv[0]]),
        '_source_hash':
        kn.unpack(filename)['_source_hash'],
        'ext':
        '.png'
    }),
                transparent=True,
                bbox_inches='tight',
                pad_inches=0)

    plt.clf()
    plt.close(plt.gcf())
コード例 #3
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    live = np.array(file['Live']['upd_' + str(upd)])
    pop = np.array(file['Population']['upd_' + str(upd)])
    triggers = np.array(file['Triggers']['upd_' + str(upd)])

    image = np.array([[(0.0, 0.0, 0.0) if not val_live else (1.0, 1.0, 1.0)
                       for val_live in row_live] for row_live in live])

    plt.figure(figsize=(18, 18))

    plt.imshow(image, extent=(0, image.shape[1], image.shape[0], 0))

    plt.axis('off')
    plt.grid(b=None)

    lines = LineCollection(
        [((x, y), dest) for x in range(image.shape[0])
         for y in range(image.shape[1]) for dest in ((x + 1, y), (x, y + 1))
         if (pop[y][x] != pop[dest[1] - 1][dest[0] - 1]
             or triggers[y][x] != triggers[dest[1] - 1][dest[0] - 1])],
        linestyle='solid',
        colors='red')
    plt.gca().add_collection(lines)

    plt.savefig(kn.pack({
        'title':
        'death_viz',
        'update':
        str(upd),
        'seed':
        kn.unpack(filename)['seed'],
        'treat':
        kn.unpack(filename)['treat'],
        '_data_hathash_hash':
        fsh.FilesHash().hash_files([filename]),
        '_script_fullcat_hash':
        fsh.FilesHash(file_parcel="full_parcel",
                      files_join="cat_join").hash_files([sys.argv[0]]),
        '_source_hash':
        kn.unpack(filename)['_source_hash'],
        'ext':
        '.png'
    }),
                transparent=True,
                bbox_inches='tight',
                pad_inches=0)

    plt.clf()
    plt.close(plt.gcf())
コード例 #4
0
def get_critical_sites(variant_df, control_fits_df):

    # count competions where both strains went extinct simultaneously
    # as 0 Fitness Differential
    na_rows = variant_df['Fitness Differential'].isna()
    assert all(variant_df[na_rows]['Population Extinct'])
    variant_df['Fitness Differential'].fillna(
        0,
        inplace=True,
    )

    res = {}
    for series in variant_df['Competition Series'].unique():

        series_df = variant_df[variant_df['Competition Series'] == series]

        wt_vs_variant_df = series_df[
            series_df['genome variation'] != 'master'].reset_index()

        h0_fit = ip.popsingleton(
            control_fits_df[control_fits_df['Series'] == series].to_dict(
                orient='records', ))

        # calculate the probability of observing fitness differential result
        # under control data distribution
        if len(wt_vs_variant_df):
            wt_vs_variant_df['p'] = wt_vs_variant_df.apply(
                lambda row: stats.t.cdf(
                    row['Fitness Differential'],
                    h0_fit['Fit Degrees of Freedom'],
                    loc=h0_fit['Fit Loc'],
                    scale=h0_fit['Fit Scale'],
                ),
                axis=1,
            )
        else:
            # special case for an empty dataframe
            # to prevent an exception
            wt_vs_variant_df['p'] = []

        p_thresh = 1.0 / 100
        less_fit_variants = wt_vs_variant_df[wt_vs_variant_df['p'] < p_thresh]

        variation_attrs = [
            kn.unpack(kn.promote(variation), source_attr=False)
            for variation in less_fit_variants['genome variation']
        ]

        assert all('Nop-' in ip.popsingleton(attrs.values())
                   for attrs in variation_attrs)

        critical_idxs = [
            int(re.search('^i(\d+)$', ip.popsingleton(attrs.keys())).group(1))
            for attrs in variation_attrs
        ]

        res[series] = set(critical_idxs)

    return res
コード例 #5
0
def genome_local_autoload(target):
    if kn.unpack(target)['ext'] == '.json':
        with open(target, 'r') as f:
            return json.load(f)

    elif kn.unpack(target)['ext'] == '.json.gz':
        try:
            with gzip.open(target, 'rb') as f:
                return json.loads(f.read().decode('ascii'))
        except Exception:
            pass

        try:
            with gzip.open(target, 'rb') as f:
                return json.loads(f.read().decode('utf-8'))
        except Exception:
            pass

    raise ValueError
コード例 #6
0
def make_output_filename():

    df = pd.DataFrame.from_records(
        [kn.unpack(source_filename) for source_filename in sys.argv[1:]], )

    out_attrs = {
        column: ib.dub(df[column])
        for column in df.columns if column not in {
            '_',
            'ext',
        }
    }

    out_filename = kn.pack({
        **out_attrs,
        **{
            'concat': str(len(sys.argv) - 1),
            'ext': '.csv',
        },
    })

    return out_filename
コード例 #7
0
from keyname import keyname as kn
import sys
import pandas as pd

filenames = sys.argv[1:]

dataframes = []

for filename in filenames:
    df = pd.read_csv(filename)

    for k, v in kn.unpack(filename).items():
        if k not in ['_', 'ext']:
            df[k] = v

    dataframes.append(df)

res = pd.concat(dataframes, ignore_index=True)

res.to_csv("consolidated.csv", index=False)
コード例 #8
0
import numpy as np
import sys
from tqdm import tqdm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
from keyname import keyname as kn
from fileshash import fileshash as fsh
from joblib import delayed, Parallel

upd = int(sys.argv[1])
filenames = sys.argv[2:]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1

def LoadRelevantData(filename):
    return pd.concat([
        chunk[chunk['update'] == upd]
        for chunk in pd.read_csv(filename, iterator=True, chunksize=2048)
    ])

def SafeLoadRelevantData(filename):
    try:
        res = LoadRelevantData(filename)
        if not len(res):
            raise ValueError("update " + str(upd) + " not found")
        return res
    except Exception as e:
        print("warning: corrupt or incomplete data file... skipping")
コード例 #9
0
        np.max(egv_cvals),
        "Eigenvector Centrality Variance":
        np.var(egv_cvals),
        "Maximum Load Centrality":
        np.max(load_cvals),
        "Median Load Centrality":
        np.median(load_cvals),
        "Load Centrality Variance":
        np.var(load_cvals),
        "Minimum Spanning Weight":
        nx.minimum_spanning_tree(G.to_undirected()).size(weight='weight')
    })

outfile = kn.pack({
    'title':
    kn.unpack(dataframe_filename)['title'] + "-stats",
    'bitweight':
    kn.unpack(dataframe_filename)['bitweight'],
    'seed':
    kn.unpack(dataframe_filename)['seed'],
    '_data_hathash_hash':
    fsh.FilesHash().hash_files([dataframe_filename]),
    '_script_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([sys.argv[0]]),
    # '_source_hash' :kn.unpack(dataframe_filename)['_source_hash'],
    'ext':
    '.csv'
})
pd.DataFrame.from_records(out).to_csv(outfile, index=False)
print("output saved to", outfile)
コード例 #10
0
matplotlib.rcParams['pdf.fonttype'] = 42

df_key = pd.read_csv(sys.argv[1])

dataframe_filenames = sys.argv[2:]

dfs = [(filename, pd.read_csv(filename)) for filename in dataframe_filenames]

print("Data loaded!")

res = []
for filename, df in dfs:

    df = df.groupby(['Genome Position']).mean().reset_index()

    for k, v in kn.unpack(filename).items():
        df[k] = v

    res.append(df)

df_data = pd.concat(res)

df_data['Slug'] = df_data['metric-slug']

key = {
    row['Slug']:
    {col: row[col]
     for col, val in row.iteritems() if col != 'Slug'}
    for idx, row in df_key.iterrows()
}
コード例 #11
0
ax.set(
    ylim=(0,
          df[df["Cause"] == "Apoptosis"]["Per-Cell-Update Death Rate"].max() *
          1.1))
plt.xticks(rotation=-90)

outfile = kn.pack({
    'title':
    'apoptosis',
    '_data_hathash_hash':
    fsh.FilesHash().hash_files([dataframe_filename]),
    '_script_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([sys.argv[0]]),
    '_source_hash':
    kn.unpack(dataframe_filename)['_source_hash'],
    'ext':
    '.pdf'
})

ax.get_figure().savefig(outfile,
                        transparent=True,
                        bbox_inches='tight',
                        pad_inches=0)

print('Output saved to', outfile)

plt.clf()

ax = sns.barplot(
    x="Treatment",
コード例 #12
0
import h5py
import sys
import scipy.stats as stats
from tqdm import tqdm
import os
import pandas as pd
from keyname import keyname as kn
from fileshash import fileshash as fsh
from joblib import delayed, Parallel

first_update = int(sys.argv[1])
last_update = int(sys.argv[2])
filenames = sys.argv[3:]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash']
            for filename in filenames}) == 1


def CalcSurroundedRate(filename):
    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))
    return np.mean([
        sum(ro[idx] != -1 for ro in ros) for ch, pc, dirs, live, ros in [(
            np.array(file['Channel']['lev_' +
                                     str(nlev - 1)]['upd_' +
                                                    str(upd)]).flatten(),
            np.array(file['PrevChan']['upd_' + str(upd)]).flatten(), [
                np.array(file['Index'][dir_key]).flatten()
                for dir_key in file['RepOutgoing']
            ], np.array(file['Live']['upd_' + str(upd)]).flatten(), [
コード例 #13
0
import os
from tqdm import tqdm
import pandas as pd
from keyname import keyname as kn
from fileshash import fileshash as fsh
import re
from collections import Counter, defaultdict
from joblib import delayed, Parallel
import json

num_files = int(sys.argv[1])
filenames = sys.argv[2:num_files+2]
updates = [int(v) for v in sys.argv[num_files+2:]]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1

def GroupShape(filename):

    file = h5py.File(filename, 'r')
    indices = {
        idx : i
        for i, idx in enumerate(np.array(file['Index']['own']).flatten())
    }
    neighs = [
        np.array(file['Index']['dir_'+str(dir)]).flatten()
        for dir in range(4)
    ]
    nlev = int(file.attrs.get('NLEV'))

    res = defaultdict(lambda: [0 for __ in range(5)])
コード例 #14
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    channel = np.array(
        file['Channel']['lev_'+str(nlev-1)]['upd_'+str(upd)]
    ).flatten()
    regulator = [
        np.array(
            file['Regulators']['dir_'+str(dir)]['upd_'+str(upd)]
        ).flatten()
        for dir in range(4)
    ]
    decoder = np.array(
        file['Regulators']['decoder']['upd_'+str(upd)]
    ).flatten()
    live = np.array(file['Live']['upd_'+str(upd)])
    index = np.array(file['Index']['own'])

    data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)])
    data_1 = (
        np.array(file['Channel']['lev_0']['upd_'+str(upd)])
        if nlev == 1 else
        np.array(file['Channel']['lev_1']['upd_'+str(upd)])
    )

    # get unique group IDs
    ids = { id for id in channel.flatten() }

    # for each group, get all functions
    cmapper = [ {} for dir in range(4) ]
    for id in ids:
        tags_to_regs = []
        idxs = []
        dirs = []
        for flat_idx, idx in enumerate(index.flatten()):
            if channel[flat_idx] == id:
                for dir in range(4):
                    idxs.append( idx )
                    dirs.append( dir )
                    if live.flatten()[flat_idx]:
                        archive = json.loads(
                            decoder[regulator[dir][flat_idx]].decode("utf-8")
                        )['value0']
                        tags = {
                            d['key'] : d['value']['value0']['value0']
                            for d in archive['tags']
                        }
                        regulators = {
                            d['key'] : d['value']
                            for d in archive['regulators']
                        }
                        tags_to_regs.append({
                            tags[uid] : regulators[uid]["state"] for uid in archive['uids']
                        })

        df = pd.DataFrame.from_records(tags_to_regs).fillna(1)

        n=min(3, len(df.columns), len(df))
        if n:
            pca = PCA(n_components=n)

            pc = None
            with warnings.catch_warnings():
                # ignore sklearn and divide by zero warnings
                # (we handle them below)
                warnings.simplefilter("ignore")
                pc = pca.fit_transform(df.to_numpy())
                pc = (pc - pc.min(0)) / pc.ptp(0)

            for idx, dir, row in zip(idxs, dirs, pc):
                cmapper[dir][idx] = (
                    row[0] if row.size >= 1 and not np.isnan(row[0]) else 0.5,
                    row[1] if row.size >= 2 and not np.isnan(row[1]) else 0.5,
                    row[2] if row.size >= 3 and not np.isnan(row[2]) else 0.5,
                )
        else:
            for idx, dir in zip(idxs, dirs):
                cmapper[dir][idx] = (0.5, 0.5, 0.5)

    image = np.flip(np.rot90(np.transpose(np.block([
        [
            np.transpose(RenderTriangles(
                cmapper[0][val_index],
                cmapper[1][val_index],
                cmapper[2][val_index],
                cmapper[3][val_index],
                val_live
            )) for val_live, val_index in zip(row_live, row_index)
        ]
        for row_live, row_index
        in zip(live, index)
    ])),k=1),axis=0)

    plt.figure(figsize=(18,18))

    plt.imshow(
        image,
        extent = (0, image.shape[1], image.shape[0], 0)
    )

    plt.axis('off')
    plt.grid(b=None)

    rescale = lambda coord: [v * 42 for v in coord]
    lines_0 = LineCollection([
        [ rescale(coord) for coord in ((x,y), dest) ]
        for x in range(index.shape[0])
        for y in range(index.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_0[y][x] != data_0[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='white', linewidths=(2,))
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection([
        [ rescale(coord) for coord in ((x,y), dest) ]
        for x in range(index.shape[0])
        for y in range(index.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_1[y][x] != data_1[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='black', linewidths=(2,))
    plt.gca().add_collection(lines_1)

    plt.savefig(
        kn.pack({
            'title' : 'directional_regulator_viz',
            'update' : str(upd),
            'seed' : kn.unpack(filename)['seed'],
            'treat' : kn.unpack(filename)['treat'],
            '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]),
            '_script_fullcat_hash' : fsh.FilesHash(
                                                file_parcel="full_parcel",
                                                files_join="cat_join"
                                            ).hash_files([sys.argv[0]]),
            '_source_hash' :kn.unpack(filename)['_source_hash'],
            'ext' : '.png'
        }),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0
    )

    plt.clf()
    plt.close(plt.gcf())
コード例 #15
0
ファイル: collate.py プロジェクト: perryk12/dishtiny
    prefix_url, regex = sys.argv[1:]
except:
    print('bad arguments')
    print('USAGE: [prefix_url] [regex]')
    sys.exit(1)

bucket = re.search('s3://(.+?)/', prefix_url).group(1)
prefix = re.search(f's3://{bucket}/(.+)', prefix_url).group(1)

print(f'prefix_url {prefix_url}')
print(f'regex {regex}')
print(f'bucket {bucket}')
print(f'prefix {prefix}')

assert prefix.count('stage=') == 1 and prefix.count('what=') == 1
assert any('stage' in kn.unpack(segment) and 'what' in kn.unpack(segment)
           for segment in prefix.split('/'))

stages, = [
    list(kn.unpack(segment)['stage'].split('~'))
    for segment in prefix.split('/')
    if 'stage' in kn.unpack(segment) and 'what' in kn.unpack(segment)
]

print(f'stages {stages}')

prefixes = [
    '/'.join(
        kn.pack({
            **kn.unpack(segment),
            **{
コード例 #16
0
ファイル: collate.py プロジェクト: mmore500/dishtiny
    prefix_url, regex = sys.argv[1:]
except:
    print(__doc__)
    sys.exit(1)

bucket = re.search('s3://(.+?)/', prefix_url).group(1)
prefix = re.search(f's3://{bucket}/(.+)', prefix_url).group(1)

print(f'prefix_url {prefix_url}')
print(f'regex {regex}')
print(f'bucket {bucket}')
print(f'prefix {prefix}')

assert prefix.count('stage=') == 1 and prefix.count('what=') == 1
assert any(
    'stage' in kn.unpack(segment) and 'what' in kn.unpack(segment)
    for segment in prefix.split('/')
)

stages, = [
    list(kn.unpack(segment)['stage'].split('~'))
    for segment in prefix.split('/')
    if 'stage' in kn.unpack(segment) and 'what' in kn.unpack(segment)
]

print(f'stages {stages}')

prefixes = [ '/'.join(
    kn.pack({
        **kn.unpack(segment),
        **{ 'stage' : stage, },
コード例 #17
0
g = sns.barplot(
    data=df_data,
    x='Metric',
    y='Match Distance',
    order=sorted(['Hamming', 'Hash', 'Integer', 'Streak', 'Integer (bi)']),
)
g.set(ylim=(0, 1))
g.set_xticklabels(g.get_xticklabels(), rotation=90)

plt.gcf().set_size_inches(3.75, 2.75)

outfile = kn.pack({
    'title':
    'dimensionality_barplot',
    'bitweight':
    kn.unpack(dataframe_filename)['bitweight'],
    'seed':
    kn.unpack(dataframe_filename)['seed'],
    '_data_hathash_hash':
    fsh.FilesHash().hash_files([dataframe_filename]),
    '_script_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([sys.argv[0]]),
    # '_source_hash' :kn.unpack(dataframe_filename)['_source_hash'],
    'ext':
    '.pdf'
})
plt.savefig(outfile, transparent=True, bbox_inches='tight', pad_inches=0)
print("output saved to", outfile)

plt.clf()
コード例 #18
0
])['Elapsed Generations', ].diff(periods=-1)

df['Generations Per Update'] = df['Elapsed Generations Delta'] / df['Update']

################################################################################
print()
print('calculating upload path')
print('---------------------------------------------------------------------')
################################################################################

# common_keys = set.intersection(*[
#     set( kn.unpack(source).keys() )
#     for source in sources
# ])

out_filename = kn.pack(kn.unpack(data_url.key), )

out_prefix = f'endeavor={endeavor}/thread-profiles/stage=8+what=elaborated/'
out_path = out_prefix + out_filename

print(f'upload path will be s3://{bucket}/{out_path}')

################################################################################
print()
print('dumping and uploading')
print('---------------------------------------------------------------------')
################################################################################

# have to work with filename or pandas compression doesn't work
with tempfile.TemporaryDirectory() as tmp:
コード例 #19
0
def load_json(filename):
    with open(filename) as json_file:
        data = json.load(json_file)
    return data


res = defaultdict(list)

for filename, entry in [(filename, load_json(filename))
                        for filename in sys.argv[1:]]:
    for benchmark in entry['benchmarks']:
        res[frozendict({
            'run_type': benchmark['run_type'],
        })].append({
            'Library':
            kn.unpack(filename)['library'],
            'Implementation':
            'vanilla' if 'SignalGP' in filename else 'lite',
            'Statistic': (benchmark['aggregate_name']
                          if 'aggregate_name' in benchmark else 'measurement'),
            'Wall Nanoseconds':
            benchmark['real_time'],
            'CPU Nanoseconds':
            benchmark['cpu_time'],
            'num agents':
            benchmark['num agents'],
        })

for run_specs, rows in res.items():
    pd.DataFrame(rows).to_csv(
        kn.pack({
コード例 #20
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    own = np.array(file['Index']['own']).flatten()
    dirs = {
        'top': np.array(file['Index']['dir_0']).flatten(),
        'bottom': np.array(file['Index']['dir_1']).flatten(),
        'left': np.array(file['Index']['dir_3']).flatten(),
        'right': np.array(file['Index']['dir_2']).flatten(),
    }

    chans = [
        np.array(file['Channel']['lev_' + str(lev)]['upd_' +
                                                    str(upd)]).flatten()
        for lev in range(nlev)
    ]
    cage = np.array(file['CellAge']['upd_' + str(upd)]).flatten()
    pvch = np.array(file['PrevChan']['upd_' + str(upd)]).flatten()
    ppos = np.array(file['ParentPos']['upd_' + str(upd)]).flatten()

    live = np.array(file['Live']['upd_' + str(upd)])

    data_0 = np.array(file['Channel']['lev_0']['upd_' + str(upd)])
    data_1 = (np.array(file['Channel']['lev_0']['upd_' + str(upd)]) if nlev
              == 1 else np.array(file['Channel']['lev_1']['upd_' + str(upd)]))

    res = defaultdict(dict)
    for idx in range(own.size):
        for dir, drct in dirs.items():
            type = NONE
            if pvch[idx] == chans[-1][drct[idx]]:
                type = P_CHILD
            elif pvch[drct[idx]] == chans[-1][idx]:
                type = P_PARENT
            else:
                # grayscale channel ID
                type = (chans[-1][idx] / 2**64) * 0.8

            res[own[idx]][dir] = type

    own = np.array(file['Index']['own'])
    live = np.array(file['Live']['upd_' + str(upd)])

    image = np.flip(np.rot90(np.transpose(
        np.block([[
            np.transpose(
                RenderTriangles(res[val_own]['top'], res[val_own]['bottom'],
                                res[val_own]['right'], res[val_own]['left'],
                                val_live))
            for val_own, val_live in zip(row_own, row_live)
        ] for row_own, row_live in zip(own, live)])),
                             k=1),
                    axis=0)

    plt.figure(figsize=(18, 18))

    plt.imshow(image, extent=(0, image.shape[1], image.shape[0], 0))

    plt.axis('off')
    plt.grid(b=None)

    rescale = lambda coord: [v * 42 for v in coord]
    lines_0 = LineCollection(
        [[rescale(coord) for coord in ((x, y), dest)]
         for x in range(data_0.shape[0]) for y in range(data_0.shape[1])
         for dest in ((x + 1, y), (x, y + 1))
         if data_0[y][x] != data_0[dest[1] - 1][dest[0] - 1]],
        linestyle=(0, (1, 3)),
        colors='0.5')
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection(
        [[rescale(coord) for coord in ((x, y), dest)]
         for x in range(data_1.shape[0]) for y in range(data_1.shape[1])
         for dest in ((x + 1, y), (x, y + 1))
         if data_1[y][x] != data_1[dest[1] - 1][dest[0] - 1]],
        linestyle='solid',
        colors='black')
    plt.gca().add_collection(lines_1)

    plt.savefig(kn.pack({
        'title':
        'directional_propagule_viz',
        'update':
        str(upd),
        'seed':
        kn.unpack(filename)['seed'],
        'treat':
        kn.unpack(filename)['treat'],
        '_data_hathash_hash':
        fsh.FilesHash().hash_files([filename]),
        '_script_fullcat_hash':
        fsh.FilesHash(file_parcel="full_parcel",
                      files_join="cat_join").hash_files([sys.argv[0]]),
        '_source_hash':
        kn.unpack(filename)['_source_hash'],
        'ext':
        '.png'
    }),
                transparent=True,
                bbox_inches='tight',
                pad_inches=0)

    plt.clf()
    plt.close(plt.gcf())
コード例 #21
0
    return np.mean([
        1 if apop[idx] == 2 else 0
        for apop in [np.array(file['Apoptosis']['upd_' + str(upd)]).flatten()]
        for idx in range(file['Index']['own'].size)
    ])


def ExtractUpdates(file):
    return [int(re.findall('\d+', key)[0]) for key in file['Apoptosis'].keys()]


outfile = kn.pack({
    'title':
    'apoptosis_series',
    'seed':
    kn.unpack(filename)['seed'],
    'treat':
    kn.unpack(filename)['treat'],
    '_data_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([filename]),
    '_script_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([sys.argv[0]]),
    '_source_hash':
    kn.unpack(filename)['_source_hash'],
    'ext':
    '.csv'
})

pd.DataFrame.from_dict([{
コード例 #22
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    # display current stockpile AND inbound resource
    stock = np.array(file['Stockpile']['upd_'+str(upd)])
    share = np.array(file['TotalContribute']['upd_'+str(upd)])
    live = np.array(file['Live']['upd_'+str(upd)])

    data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)])
    data_1 = (
        np.array(file['Channel']['lev_0']['upd_'+str(upd)])
        if nlev == 1 else
        np.array(file['Channel']['lev_1']['upd_'+str(upd)])
    )

    image = np.array([
        [
            # dead
            (0.0, 0.0, 0.0)
            if not val_live else
            # enough resource to reproduce (green to yellow)
            (
                min(1.0, (val_stock + val_share) - 1.0),
                1.0,
                0.0
            ) if val_stock + val_share > 1.0 else
            # not yet enough resource to reproduce (blue)
            (
                1.0 - (val_stock + val_share),
                1.0 - (val_stock + val_share),
                1.0
            ) if val_stock + val_share > 0.0 else
            # netative resource (red)
            (
                1.0,
                max(0.0, 1.0 + (val_stock + val_share) / 1.25),
                max(0.0, 1.0 + (val_stock + val_share) / 1.25)
            )
            for val_stock, val_share, val_live in zip(row_stock, row_share, row_live)
        ]
    for row_stock, row_share, row_live in zip(stock, share, live)])

    plt.figure(figsize=(18,18))

    plt.imshow(
        image,
        extent = (0, image.shape[1], image.shape[0], 0)
    )

    plt.axis('off')
    plt.grid(b=None)

    lines_0 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_0[y][x] != data_0[dest[1]-1][dest[0]-1]
    ], linestyle=(0, (1, 3)), colors='0.5')
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_1[y][x] != data_1[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='black')
    plt.gca().add_collection(lines_1)

    plt.savefig(
        kn.pack({
            'title' : 'stockpile_viz',
            'update' : str(upd),
            'seed' : kn.unpack(filename)['seed'],
            'treat' : kn.unpack(filename)['treat'],
            '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]),
            '_script_fullcat_hash' : fsh.FilesHash(
                                                file_parcel="full_parcel",
                                                files_join="cat_join"
                                            ).hash_files([sys.argv[0]]),
            '_source_hash' :kn.unpack(filename)['_source_hash'],
            'ext' : '.png'
        }),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0
    )

    plt.clf()
    plt.close(plt.gcf())
コード例 #23
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)])
    data_1 = (
        np.array(file['Channel']['lev_0']['upd_'+str(upd)])
        if nlev == 1 else
        np.array(file['Channel']['lev_1']['upd_'+str(upd)])
    )

    image = np.array([
        [
            tuple(colorsys.hsv_to_rgb(
                (val_1/2**63)%1.0,
                (val_0/2**63)%0.6 + 0.4,
                1.0
            ))
            if val_0 and val_1 else (0,0,0)
        for val_0, val_1 in zip(row_0, row_1)
        ]
    for row_0, row_1 in zip(data_0, data_1)])

    plt.figure(figsize=(18,18))

    plt.imshow(
        image,
        extent = (0, image.shape[1], image.shape[0], 0)
    )

    plt.axis('off')
    plt.grid(b=None)

    lines_0 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_0[y][x] != data_0[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='white')
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_1[y][x] != data_1[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='black')
    plt.gca().add_collection(lines_1)

    plt.savefig(
        kn.pack({
            'title' : 'channel_viz',
            'update' : str(upd),
            'seed' : kn.unpack(filename)['seed'],
            'treat' : kn.unpack(filename)['treat'],
            '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]),
            '_script_fullcat_hash' : fsh.FilesHash(
                                                file_parcel="full_parcel",
                                                files_join="cat_join"
                                            ).hash_files([sys.argv[0]]),
            '_source_hash' :kn.unpack(filename)['_source_hash'],
            'ext' : '.png'
        }),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0
    )

    plt.clf()
    plt.close(plt.gcf())
コード例 #24
0
import h5py
import sys
from tqdm import tqdm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
from keyname import keyname as kn
from fileshash import fileshash as fsh
from joblib import delayed, Parallel
from natsort import natsorted

filenames = sys.argv[1:]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1

def CalcCountShare(filename):
    file = h5py.File(filename, 'r')
    zips = zip(*(
        zip(
            np.array(file['Live'][upd_key]).flatten(),
            (sum(z) for z in zip(*(
                np.array(
                    file['ResourceContributed']['dir_'+str(dir)][upd_key]
                    ).flatten()
                for dir in range(5)
            ))),
        ) for upd_key in natsorted([key for key in file['Live']])[-16:]
    ))
    proportion = sum([
コード例 #25
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    channel = np.array(
        file['Channel']['lev_'+str(nlev-1)]['upd_'+str(upd)]
    ).flatten()
    regulators = [
        np.array(
            file['Regulators']['dir_'+str(dir)]['upd_'+str(upd)]
        ).flatten()
        for dir in range(4)
    ]
    live = np.array(file['Live']['upd_'+str(upd)])
    index = np.array(file['Index']['own'])

    data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)])
    data_1 = (
        np.array(file['Channel']['lev_0']['upd_'+str(upd)])
        if nlev == 1 else
        np.array(file['Channel']['lev_1']['upd_'+str(upd)])
    )

    # get unique group IDs
    ids = { id for id in channel.flatten() }

    # for each group, get all regulators
    cmapper = {}
    for id in ids:
        tags_to_regs = []
        idxs = []
        for flat_idx, idx in enumerate(index.flatten()):
            if channel[flat_idx] == id:
                idxs.append(idx)
                if live.flatten()[flat_idx]:
                    archives = [ json.loads(
                        regulator[flat_idx].decode("utf-8")
                    )['value0'] for regulator in regulators ]
                    tags = {
                        d['key'] : d['value']['value0']['value0']
                        for d in archives[0]['tags']
                    }

                    regulatorsum = defaultdict(lambda: 0.0)
                    for archive in archives:
                        for d in archive['regulators']:
                            regulatorsum[d['key']] += d['value']

                    tags_to_regs.append({
                        tags[uid] : regulatorsum[uid] for uid in archives[0]['uids']
                    })


        df = pd.DataFrame.from_records(
            tags_to_regs
        ).fillna(1)

        if pcamapper[id] is not None:
            pca, cols, minv, ptpv = pcamapper[id]
            pc = pca.transform(df[cols].to_numpy())
            pc = (pc - minv) / ptpv

            for idx, row in zip(idxs, pc):
                cmapper[idx] = (
                    row[0] if row.size >= 1 and not np.isnan(row[0]) else 0.5,
                    row[1] if row.size >= 2 and not np.isnan(row[1]) else 0.5,
                    row[2] if row.size >= 3 and not np.isnan(row[2]) else 0.5,
                )
        else:
            for idx in idxs:
                cmapper[idx] = (0.5, 0.5, 0.5)

    image = np.array([
        [
            cmapper[val_index] if val_live else (0.0,0.0,0.0)
            for val_index, val_live in zip(row_index, row_live)
        ]
    for row_index, row_live in zip(index, live)])

    plt.figure(figsize=(18,18))

    plt.imshow(
        image,
        extent = (0, image.shape[1], image.shape[0], 0)
    )

    plt.axis('off')
    plt.grid(b=None)

    lines_0 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_0[y][x] != data_0[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='white')
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection([
        ((x,y), dest)
        for x in range(image.shape[0])
        for y in range(image.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_1[y][x] != data_1[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='black')

    plt.gca().add_collection(lines_1)

    plt.savefig(
        kn.pack({
            'title' : 'consistent_regulator_viz',
            'update' : str(upd),
            'seed' : kn.unpack(filename)['seed'],
            'treat' : kn.unpack(filename)['treat'],
            '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]),
            '_script_fullcat_hash' : fsh.FilesHash(
                                                file_parcel="full_parcel",
                                                files_join="cat_join"
                                            ).hash_files([sys.argv[0]]),
            '_source_hash' :kn.unpack(filename)['_source_hash'],
            'ext' : '.png'
        }),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0
    )

    plt.clf()
    plt.close(plt.gcf())
コード例 #26
0
import numpy as np
import random
import sys

from dishpylib.pyloaders import genome_local_autoload

random.seed(1)

try:
    __, less_ops, more_ops, num_interpolation_steps = sys.argv
    num_interpolation_steps = int(num_interpolation_steps)
except:
    print(__doc__)
    sys.exit(1)

assert 'ext' in kn.unpack(less_ops)
assert 'ext' in kn.unpack(more_ops)

less_ops_data = genome_local_autoload(less_ops)
more_ops_data = genome_local_autoload(more_ops)

less_ops_num_insts = len(less_ops_data['value0']['program'])
more_ops_num_insts = len(more_ops_data['value0']['program'])

assert less_ops_num_insts == more_ops_num_insts

less_ops_num_ops = sum('Nop-' not in inst['operation']
                       for inst in less_ops_data["value0"]["program"])
more_ops_num_ops = sum('Nop-' not in inst['operation']
                       for inst in more_ops_data["value0"]["program"])
コード例 #27
0
import numpy as np
import h5py
import sys
from tqdm import tqdm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
from keyname import keyname as kn
from fileshash import fileshash as fsh
from pathlib import Path

filenames = sys.argv[1:]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash']
            for filename in filenames}) == 1

dfs = []

for filename in tqdm(filenames):
    df = pd.read_csv(filename)
    df["seed"] = int(kn.unpack(Path(filename).parts[-2])["seed"])
    df["step"] = int(kn.unpack(Path(filename).parts[-2])["step"])
    dfs.append(df)

df = pd.concat(dfs)

df = df[df["step"] < 1050]

outfile = kn.pack({
コード例 #28
0
    } for upds in upd_groups for lupds in (list(upds), )
     for obs, upd in enumerate(lupds)
     for idx, row in raw[raw['Update'] == upd].iterrows()])

ax = sns.lineplot(x="Update",
                  y="Per-Cell-Update Apoptosis Rate",
                  hue="Type",
                  data=proc)

ax.set_ylim(ymin=0.0, ymax=0.0003)

outfile = kn.pack({
    'title':
    'apoptosis_series',
    'treat':
    kn.unpack(dataframe_filename)['treat'],
    'seed':
    kn.unpack(dataframe_filename)['seed'],
    '_data_hathash_hash':
    fsh.FilesHash().hash_files([dataframe_filename]),
    '_script_fullcat_hash':
    fsh.FilesHash(file_parcel="full_parcel",
                  files_join="cat_join").hash_files([sys.argv[0]]),
    '_source_hash':
    kn.unpack(dataframe_filename)['_source_hash'],
    'ext':
    '.pdf'
})

ax.get_figure().savefig(outfile,
                        transparent=True,
コード例 #29
0
import numpy as np
import h5py
import sys
from tqdm import tqdm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
from keyname import keyname as kn
from fileshash import fileshash as fsh
from pathlib import Path

filenames = sys.argv[1:]

# check all data is from same software source
assert len({kn.unpack(filename)['_source_hash'] for filename in filenames}) == 1

dfs = []

for filename in tqdm(filenames):
    # adapted from https://stackoverflow.com/a/48135340
    try:
        df = pd.read_csv(filename)
        df["seed"] = int(kn.unpack(Path(filename).parts[-2])["seed"])
        df["step"] = int(kn.unpack(Path(filename).parts[-2])["step"])
        dfs.append(df)
    except pd.io.common.EmptyDataError:
        print(filename, " is empty and has been skipped.")

df = pd.concat(dfs)
コード例 #30
0
def RenderAndSave(upd, filename):

    file = h5py.File(filename, 'r')
    nlev = int(file.attrs.get('NLEV'))

    top = np.array(file['InboxTraffic']['dir_0']['upd_'+str(upd)])
    bottom = np.array(file['InboxTraffic']['dir_1']['upd_'+str(upd)])
    left = np.array(file['InboxTraffic']['dir_3']['upd_'+str(upd)])
    right = np.array(file['InboxTraffic']['dir_2']['upd_'+str(upd)])
    live = np.array(file['Live']['upd_'+str(upd)])

    data_0 = np.array(file['Channel']['lev_0']['upd_'+str(upd)])
    data_1 = (
        np.array(file['Channel']['lev_0']['upd_'+str(upd)])
        if nlev == 1 else
        np.array(file['Channel']['lev_1']['upd_'+str(upd)])
    )

    image = np.flip(np.rot90(np.transpose(np.block([
        [
            np.transpose(RenderTriangles(
                val_top,
                val_bottom,
                val_right,
                val_left,
                val_live
            )) for val_top, val_bottom, val_left, val_right, val_live in zip(
                row_top,
                row_bottom,
                row_left,
                row_right,
                row_live
            )
        ]
        for row_top, row_bottom, row_left, row_right, row_live
        in zip(
            top,
            bottom,
            left,
            right,
            live
        )
    ])),k=1),axis=0)

    plt.figure(figsize=(18,18))

    plt.imshow(
        image,
        extent = (0, image.shape[1], image.shape[0], 0)
    )

    plt.axis('off')
    plt.grid(b=None)

    rescale = lambda coord: [v * 42 for v in coord]
    lines_0 = LineCollection([
        [ rescale(coord) for coord in ((x,y), dest) ]
        for x in range(data_0.shape[0])
        for y in range(data_0.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_0[y][x] != data_0[dest[1]-1][dest[0]-1]
    ], linestyle=(0, (1, 3)), colors='0.5')
    plt.gca().add_collection(lines_0)

    lines_1 = LineCollection([
        [ rescale(coord) for coord in ((x,y), dest) ]
        for x in range(data_1.shape[0])
        for y in range(data_1.shape[1])
        for dest in ((x+1,y), (x,y+1))
        if data_1[y][x] != data_1[dest[1]-1][dest[0]-1]
    ], linestyle='solid', colors='black')
    plt.gca().add_collection(lines_1)

    plt.savefig(
        kn.pack({
            'title' : 'directional_messaging_viz',
            'update' : str(upd),
            'seed' : kn.unpack(filename)['seed'],
            'treat' : kn.unpack(filename)['treat'],
            '_data_hathash_hash' : fsh.FilesHash().hash_files([filename]),
            '_script_fullcat_hash' : fsh.FilesHash(
                                                file_parcel="full_parcel",
                                                files_join="cat_join"
                                            ).hash_files([sys.argv[0]]),
            '_source_hash' :kn.unpack(filename)['_source_hash'],
            'ext' : '.png'
        }),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0
    )

    plt.clf()
    plt.close(plt.gcf())