import analysisutil
import matplotlib.pyplot as plt
import numpy as np
import plotnine as pn

analysisutil.add_argument('table_name')
analysisutil.add_argument('bins', type=int)

(args, setup, file_util) = analysisutil.init(use_base_dir=True)

data = file_util.load_pandas_csv('pandas_{0}.csv'.format(args.table_name))

comm_cost = np.array(data['comm_cost'])
complexity = np.array(data['complexity'])
monotonicity = np.array(data['naturalness'])

fig = plt.figure()
plt.scatter(comm_cost, complexity)
plt.show()

max_comp = max(complexity)
min_comp = min(complexity)
max_inf = max(comm_cost)
min_inf = min(comm_cost)
comp_step = (max_comp - min_comp) / args.bins
inf_step = (max_inf - min_inf) / args.bins

plot_complexity = []
plot_informativeness = []
plot_avg_monotonicity = []
import analysisutil
import plotnine as pn
from Languages import LanguageLoader

analysisutil.add_argument('complexity_strategy')
analysisutil.add_argument('informativeness_strategy')
analysisutil.add_argument('--include_natural', dest='include_natural_languages', default=False, action='store_true')

(args, setup, file_util) = analysisutil.init()

data = LanguageLoader.load_pandas_table(file_util, args.complexity_strategy, args.informativeness_strategy)

fig = pn.ggplot(data, pn.aes('comm_cost', 'complexity')) +\
        pn.geom_point()

#if args.include_natural_languages:
 #   lex_informativeness = [inf for (ex,inf) in file_util.load_dill('informativeness_{0}_{1}.dill'.format(setup.name, args.informativeness_strategy))]
  #  lex_complexity = [com for (ex,com) in file_util.load_dill('complexity_{0}_{1}.dill'.format(setup.name, args.complexity_strategy))]
   # plt.plot(lex_informativeness, lex_complexity, 'o', color='green')

print(fig)

file_util.save_plotnine(fig, '{0}_{1}_plot'.format(
    args.complexity_strategy,
    args.informativeness_strategy
))
import analysisutil
import matplotlib.pyplot as plt

analysisutil.add_argument('set')
(args, setup, file_util) = analysisutil.init(use_base_dir=True)

monotonicities_a_up = file_util.load_dill('monotonicities_{0}_up.dill'.format(
    args.set))
monotonicities_a_down = file_util.load_dill(
    'monotonicities_{0}_down.dill'.format(args.set))

monotonicities = list(map(max, monotonicities_a_down, monotonicities_a_up))

fig = plt.figure()

plt.hist(monotonicities, bins=30)

plt.show()
file_util.save_figure(fig, 'monotonicity_{0}_hist'.format(args.set))
from pathos.pools import ProcessPool
import analysisutil
from Languages import LanguageLoader
from Languages.ComplexityMeasurer import WordCountComplexityMeasurer, SumComplexityMeasurer, SpecialComplexityMeasurer

analysisutil.add_argument('max_words', type=int)
analysisutil.add_argument('comp_strat')
(args, setup, file_util) = analysisutil.init()

languages = LanguageLoader.load_languages(file_util)

if args.comp_strat == 'wordcount':
    complexity_measurer = WordCountComplexityMeasurer(args.max_words)
elif args.comp_strat == 'wordcomplexity':
    complexity_measurer = SumComplexityMeasurer(args.max_words, 1)
elif args.comp_strat == 'special':
    complexity_measurer = SpecialComplexityMeasurer(args.max_words)
else:
    raise ValueError('{0} is not a valid complexity strategy.'.format(
        args.comp_strat))

with ProcessPool(nodes=args.processes) as pool:
    complexity = pool.map(complexity_measurer, languages)

file_util.dump_dill(complexity, 'complexity_{0}.dill'.format(args.comp_strat))
Beispiel #5
0
import os

import analysisutil
import matplotlib.pyplot as plt

analysisutil.add_argument('informativeness_strategy')

(args, setup, file_util) = analysisutil.init()
strategy = args.informativeness_strategy

informativeness = file_util.load_dill(
    'informativeness_{0}.dill'.format(strategy))

fig = plt.figure()
plt.hist(informativeness)
plt.xlabel('informativeness')
plt.show()

filename = 'informativeness_{0}_hist.png'.format(strategy)
file_util.save_figure(fig, filename)
import itertools
import math
import random
from copy import copy

import pygmo
from pathos.multiprocessing import ProcessPool

import Generator
import analysisutil
from Languages import LanguageLoader, LanguageGenerator
from Languages.ComplexityMeasurer import SumComplexityMeasurer
from Languages.InformativenessMeasurer import SimMaxInformativenessMeasurer

analysisutil.add_argument('lang_size', type=int)
analysisutil.add_argument('sample_size', type=int)
analysisutil.add_argument('generations', type=int)
analysisutil.add_argument('-m', '--max_mutations', type=int, default=1)
(args, setup, file_util) = analysisutil.init()

expressions = LanguageLoader.load_all_evaluated_expressions(file_util)

languages = LanguageGenerator.generate_sampled(
    expressions, args.lang_size, int(args.sample_size / args.lang_size))

universe = Generator.generate_simplified_models(args.model_size)


def remove(language):
    language = copy(language)
    index = random.randint(0, len(language) - 1)
Beispiel #7
0
import analysisutil

analysisutil.add_argument('threshold', type=float)

(args, setup, file_util) = analysisutil.init(use_base_dir=True)

threshold = round(args.threshold, 2)

conservativities = file_util.load_dill('conservativities_b.dill')
indices = set(i for (i, conservativity) in enumerate(conservativities) if conservativity > threshold)

file_util.dump_dill(indices, 'conservative_{0}_expression_indices.dill'.format(threshold))
Beispiel #8
0
from urllib.parse import quote_plus

import analysisutil
import matplotlib.pyplot as plt

analysisutil.add_argument('complexity_strategy')
analysisutil.add_argument('informativeness_strategy')
analysisutil.add_argument('run_names', nargs='+')

(args, setup, file_util) = analysisutil.init(use_base_dir=True)

fig = plt.figure()

for run_name in args.run_names:
    informativeness = file_util.load_dill(
        '{0}/informativeness_{1}.dill'.format(run_name,
                                              args.informativeness_strategy))
    complexity = file_util.load_dill('{0}/complexity_{1}.dill'.format(
        run_name, args.complexity_strategy))
    plt.scatter(informativeness, complexity, label=run_name)

plt.legend()
plt.xlabel('informativeness')
plt.ylabel('complexity')

plt.show()

file_util.save_figure(
    fig, '{0}_{1}_{2}_multirun_plot.png'.format(args.complexity_strategy,
                                                args.informativeness_strategy,
                                                '-'.join(args.run_names)))
Beispiel #9
0
import analysisutil
import statsmodels.formula.api as smf
import plotnine as pn

analysisutil.add_argument('table_name')

(args, setup, file_util) = analysisutil.init()
file_util_base = file_util.get_base_file_util()

df = file_util_base.load_pandas_csv("pandas_{0}.csv".format(args.table_name))

print(df.head())

def standardize(series):
    return (series - series.mean()) / series.std()


#df['conservativity'] = standardize(df['conservativity'])
#df['monotonicity'] = standardize(df['monotonicity'])
#df['naturalness'] = standardize(df['naturalness'])

#plt = (pn.ggplot(df, pn.aes('naturalness', 'pareto_closeness'))
#       + pn.geom_point()
#        + pn.stat_smooth(method='lm',color='r'))
#
#print(plt)

model = smf.ols(formula='pareto_closeness ~ naturalness', data=df)
result = model.fit()

print(result.summary())
Beispiel #10
0
import analysisutil
import matplotlib.pyplot as plt
import numpy as np

analysisutil.add_argument('complexity_strategy')
analysisutil.add_argument('informativeness_strategy')
analysisutil.add_argument('bins', type=int)

(args, setup, file_util) = analysisutil.init()

informativeness = file_util.load_dill('informativeness_{0}.dill'.format(
    args.informativeness_strategy))
complexity = file_util.load_dill('complexity_{0}.dill'.format(
    args.complexity_strategy))
monotonicity = file_util.load_dill('monotonicity.dill')

max_comp = max(complexity)
max_inf = max(informativeness)
comp_step = max_comp / args.bins
inf_step = max_inf / args.bins

plot_complexity = []
plot_informativeness = []
plot_avg_monotonicity = []

for comp_start in np.arange(0, max_comp, comp_step):
    comp_end = comp_start + comp_step
    for inf_start in np.arange(0, max_inf, inf_step):
        monotonicities = []
        inf_end = inf_start + inf_step
        for (i, (inf, comp, mono)) in enumerate(
import random
from collections import namedtuple

from pathos.multiprocessing import ProcessPool

import Generator
import analysisutil
from Languages.ComplexityMeasurer import WordCountComplexityMeasurer
from Languages.InformativenessMeasurer import InformativenessMeasurer, SimMaxInformativenessMeasurer
from Languages.LanguageGenerator import generate_all, generate_sampled, EvaluatedExpression

analysisutil.add_argument('max_words', type=int)
analysisutil.add_argument('--sample', type=int)
(args, setup, file_util) = analysisutil.init()

languages = []

universe = Generator.generate_simplified_models(args.model_size)

FakeEvaluatedExpression = namedtuple('FakeEvaluatedExpression', 'meaning')

expressions = [FakeEvaluatedExpression(tuple([random.choice([True, False]) for model in universe]))
               for i in range(10000)]

if args.sample is None:
    languages = generate_all(expressions, args.max_words, args.fixedwordcount)
else:
    languages = generate_sampled(expressions, args.max_words, args.sample)

complexity_measurer = WordCountComplexityMeasurer(args.max_words)
informativeness_measurer_exact = InformativenessMeasurer(len(universe))
Beispiel #12
0
import analysisutil
import matplotlib.pyplot as plt

analysisutil.add_argument('complexity_strategy')

(args, setup, file_util) = analysisutil.init()

complexity = file_util.load_dill('complexity_{0}.dill'.format(args.complexity_strategy))
monotonicity = file_util.load_dill('monotonicity.dill')

fig = plt.figure()
plt.scatter(monotonicity, complexity)

plt.ylabel('complexity')
plt.xlabel('monotonicity')

plt.show()

file_util.save_figure(fig, '{0}_plot_monotonicity'.format(
    args.complexity_strategy
))
from pathos.multiprocessing import ProcessPool
import analysisutil
from Languages import LanguageLoader
import pandas as pd
import numpy as np
from numpy.linalg import norm
import pygmo
import plotnine as pn

analysisutil.add_argument('table_name')
analysisutil.add_argument('pareto')
analysisutil.add_argument('run_names', nargs='*')
(args, setup, file_util) = analysisutil.init(use_base_dir=True)

pareto_data = LanguageLoader.load_pandas_table(file_util.get_sub_file_util(
    args.pareto),
                                               'wordcomplexity',
                                               'simmax',
                                               include_monotonicity=False)

run_df = pd.DataFrame({
    'complexity': [],
    'comm_cost': [],
    'run': [],
    'monotonicity': []
})
run_dfs = {}
for run_name in args.run_names:
    df = LanguageLoader.load_pandas_table(
        file_util.get_sub_file_util(run_name), 'wordcomplexity', 'simmax')
    run_dfs[run_name] = df
Beispiel #14
0
import analysisutil

analysisutil.add_argument('length', type=int)
(args, setup, file_util) = analysisutil.init(use_base_dir=True)

expressions = file_util.load_dill('expressions.dill')

indices = [
    i for (i, expr) in enumerate(expressions) if expr.length() <= args.length
]

file_util.dump_dill(indices,
                    'upto{0}_expression_indices.dill'.format(args.length))
Beispiel #15
0
from pathos.pools import ProcessPool
import Generator
import analysisutil
from Languages import LanguageLoader
from Languages.InformativenessMeasurer import SimMaxInformativenessMeasurer, InformativenessMeasurer

analysisutil.add_argument('inf_strat')
(args, setup, file_util) = analysisutil.init()

languages = LanguageLoader.load_languages(file_util)

universe = Generator.generate_simplified_models(args.model_size)

if args.inf_strat == 'exact':
    informativeness_measurer = InformativenessMeasurer(len(universe))
elif args.inf_strat == 'simmax':
    informativeness_measurer = SimMaxInformativenessMeasurer(universe)
else:
    raise ValueError('{0} is not a valid informativeness strategy.'.format(
        args.inf_strat))

with ProcessPool(nodes=args.processes) as pool:
    informativeness = pool.map(informativeness_measurer, languages)

file_util.dump_dill(informativeness,
                    'informativeness_{0}.dill'.format(args.inf_strat))
Beispiel #16
0
import statsmodels

import analysisutil
import statsmodels.formula.api as smf
import plotnine as pn

analysisutil.add_argument('table_name')
analysisutil.add_argument('natural_run')
analysisutil.add_argument('random_run')

(args, setup, file_util) = analysisutil.init(use_base_dir=True)

df = file_util.load_pandas_csv("pandas_{0}.csv".format(args.table_name))

df = df[df.apply(lambda row: row.run in [args.natural_run, args.random_run],
                 axis=1)]


def standardize(series):
    return (series - series.mean()) / series.std()


df['conservativity'] = standardize(df['conservativity'])
df['monotonicity'] = standardize(df['monotonicity'])

df['natural'] = list(
    map(lambda run_name: 1
        if run_name == args.natural_run else 0, df['run'].values))
df['natural'] = df['natural'].astype('category')

print(df.head())
Beispiel #17
0
import analysisutil
import matplotlib.pyplot as plt

analysisutil.add_argument('-i', '--indices', nargs='*')
(args, setup, file_util) = analysisutil.init(use_base_dir=True)

monotonicities = file_util.load_dill('monotonicities_max.dill')

if args.indices is not None:
    index_sets = []
    for indices_name in args.indices:
        index_sets.append(
            set(
                file_util.load_dill(
                    '{0}_expression_indices.dill'.format(indices_name))))
    indices = set.intersection(*index_sets)
    monotonicities = [monotonicities[i] for i in indices]

fig = plt.figure()

plt.hist(monotonicities, bins=30)

plt.show()
file_util.save_figure(
    fig, 'monotonicity_hist{0}'.format(
        '_{0}'.format(args.indices) if args.indices is not None else ''))
import random

import analysisutil
from Languages.LanguageGenerator import random_combinations

analysisutil.add_argument('indices')
analysisutil.add_argument('max_words', type=int)
analysisutil.add_argument('sample', type=int)
(args, setup, file_util_out) = analysisutil.init()
file_util_in = file_util_out.get_base_file_util()

natural_indices = set(file_util_in.load_dill('{0}_expression_indices.dill'.format(args.indices)))
expressions = file_util_in.load_dill('expressions.dill')
non_natural_indices = set(range(len(expressions))) - natural_indices

language_indices = []
naturalness = []
sizes = []

for lang_size in range(1,args.max_words+1):
    for i in range(args.sample):
        len_natural = random.randint(0,lang_size)
        len_random = lang_size - len_natural
        lang_random = next(random_combinations(non_natural_indices, len_random, 1))
        lang_natural = next(random_combinations(natural_indices, len_natural, 1))
        naturalness.append(len_natural / lang_size)
        language_indices.append(lang_random + lang_natural)

file_util_out.dump_dill(language_indices, 'language_indices.dill')
file_util_out.dump_dill(naturalness, 'naturalness.dill')
file_util_out.save_stringlist([list(map(lambda i: str(expressions[i]), lang)) for lang in language_indices], 'languages.txt')