def test__version3(): colnames = ['ID', 'separation'] coltypes = ['int', 'float'] t1 = datalib.Table('1', colnames, coltypes) t2 = datalib.Table('2', colnames, coltypes) t3 = datalib.Table('3', colnames, coltypes) def __addrow(table, id, separation): row = table.createRow() row['ID'] = id row['separation'] = separation __addrow(t1, '2', .02) __addrow(t1, '3', .03) __addrow(t2, '3', .04) datalib.write( 'version3__table_fixed.plt', [t1,t2,t3] ) datalib.write( 'version3__single_none.plt', [t1,t2,t3], randomAccess=False, singleSchema=True) tables1 = datalib.parse( 'version3__table_fixed.plt', keycolname = 'ID', tablename2key = int ) tables2 = datalib.parse( 'version3__single_none.plt', keycolname = 'ID', tablename2key = int ) assert( tables1[1][2]['separation'] == .02 ) assert( tables1[1][3]['separation'] == .03 ) assert( tables1[2][3]['separation'] == .04 ) assert( tables2[1][2]['separation'] == .02 ) assert( tables2[1][3]['separation'] == .03 ) assert( tables2[2][3]['separation'] == .04 )
def retrieve_data(run_dir, x_data_type, y_data_type): global use_hf_coloration recent_dir = os.path.join(run_dir, 'brain/Recent') x_filename = get_filename(x_data_type) x_path = os.path.join(recent_dir, x_filename) y_filename = get_filename(y_data_type) y_path = os.path.join(recent_dir, y_filename) if not (os.path.exists(x_path) and os.path.exists(y_path)): print 'Error! Needed Avr files are missing' exit(2) x_table = datalib.parse(x_path)[x_data_type] y_table = datalib.parse(y_path)[y_data_type] x_data = x_table.getColumn('mean').data y_data = y_table.getColumn('mean').data if use_hf_coloration: got_hf_data = False try: hf_table = datalib.parse(y_path)['hf'] got_hf_data = True except KeyError: try: hf_table = datalib.parse(x_path)['hf'] except KeyError: # must resort to time data because hf isn't available use_hf_coloration = False pass if got_hf_data: t_data = hf_table.getColumn('mean').data else: t_data = x_table.getColumn('Timestep').data # could come from either x or y else: t_data = x_table.getColumn('Timestep').data # could come from either x or y return x_data, y_data, t_data
def retrieve_data(timestep_dir, x_data_type, y_data_type, sim_type, time, max_time): global use_hf_coloration x_path, x_data, x_agents = retrieve_data_type(timestep_dir, x_data_type, sim_type, time, max_time, LimitX) y_path, y_data, y_agents = retrieve_data_type(timestep_dir, y_data_type, sim_type, time, max_time, LimitY) if not x_data or not y_data: return None, None, None # Deal with fact that the "time" data type doesn't have its own path or list of agents if not x_path: x_path = y_path x_agents = y_agents elif not y_path: y_path = x_path y_agents = x_agents # Get rid of extra agents to make len(x_data) == len(y_data) if len(x_agents) != len(y_agents): excluded_agents = list_difference(x_agents, y_agents) if len(x_agents) > len(y_agents): for agent in excluded_agents: agent_idx = x_agents.index(agent) x_agents.pop(agent_idx) x_data.pop(agent_idx) else: for agent in excluded_agents: agent_idx = y_agents.index(agent) y_agents.pop(agent_idx) y_data.pop(agent_idx) got_hf_data = False if use_hf_coloration: try: hf_table = datalib.parse(y_path)['hf'] got_hf_data = True except KeyError: try: hf_table = datalib.parse(x_path)['hf'] got_hf_data = True except KeyError: pass if got_hf_data: hf_data = hf_table.getColumn('Mean').data c_data = map(lambda x: get_hf_color(x), hf_data) else: c_data = map( lambda x: get_time_color(float(time) / float(max_time), sim_type), range(len(x_data))) return x_data, y_data, c_data
def retrieve_data(timestep_dir, x_data_type, y_data_type, sim_type, time, max_time, opacity): global use_hf_coloration x_path, x_data, x_agents = retrieve_data_type(timestep_dir, x_data_type, sim_type, LimitX, y_data_type) y_path, y_data, y_agents = retrieve_data_type(timestep_dir, y_data_type, sim_type, LimitY, x_data_type) if not x_data or not y_data: return None, None, None # Deal with fact that some data types don't have a datalib path or list of agents if not x_path: x_path = y_path elif not y_path: y_path = x_path if not x_agents: x_agents = y_agents if not y_agents: y_agents = x_agents # Get rid of any agents (and corresponding data points) not in both data sets excluded_agents = list_difference(x_agents, y_agents) # agents in x, but not in y if excluded_agents: for agent in excluded_agents: agent_idx = x_agents.index(agent) x_agents.pop(agent_idx) x_data.pop(agent_idx) excluded_agents = list_difference(y_agents, x_agents) # agents in y, but not in x if excluded_agents: for agent in excluded_agents: agent_idx = y_agents.index(agent) y_agents.pop(agent_idx) y_data.pop(agent_idx) got_hf_data = False if use_hf_coloration: try: hf_table = datalib.parse(y_path)['hf'] got_hf_data = True except KeyError: try: hf_table = datalib.parse(x_path)['hf'] got_hf_data = True except KeyError: pass if got_hf_data: hf_data = hf_table.getColumn('Mean').data c_data = map(lambda x: get_hf_color(x, opacity), hf_data) else: if use_color: c_data = map(lambda x: get_time_color(float(time)/float(max_time), sim_type, opacity), range(len(x_data))) else: c_data = map(lambda x: 0.5, range(len(x_data))) return x_data, y_data, c_data
def retrieve_data(timestep_dir, x_data_type, y_data_type, sim_type, time, max_time): global use_hf_coloration x_path, x_data, x_agents = retrieve_data_type(timestep_dir, x_data_type, sim_type, time, max_time, LimitX) y_path, y_data, y_agents = retrieve_data_type(timestep_dir, y_data_type, sim_type, time, max_time, LimitY) if not x_data or not y_data: return None, None, None # Deal with fact that the "time" data type doesn't have its own path or list of agents if not x_path: x_path = y_path x_agents = y_agents elif not y_path: y_path = x_path y_agents = x_agents # Get rid of extra agents to make len(x_data) == len(y_data) if len(x_agents) != len(y_agents): excluded_agents = list_difference(x_agents,y_agents) if len(x_agents) > len(y_agents): for agent in excluded_agents: agent_idx = x_agents.index(agent) x_agents.pop(agent_idx) x_data.pop(agent_idx) else: for agent in excluded_agents: agent_idx = y_agents.index(agent) y_agents.pop(agent_idx) y_data.pop(agent_idx) got_hf_data = False if use_hf_coloration: try: hf_table = datalib.parse(y_path)['hf'] got_hf_data = True except KeyError: try: hf_table = datalib.parse(x_path)['hf'] got_hf_data = True except KeyError: pass if got_hf_data: hf_data = hf_table.getColumn('Mean').data c_data = map(lambda x: get_hf_color(x), hf_data) else: c_data = map(lambda x: get_time_color(float(time)/float(max_time), sim_type), range(len(x_data))) return x_data, y_data, c_data
def main(): argv = sys.argv[1:] if not len(argv): usage() path_in = argv[0] path_out = argv[1] tablename = argv[2] clauses = parse_clauses( argv[3:] ) table = datalib.parse( path_in, [tablename] )[tablename] for args in clauses: mode = args[0] if mode == 'sort': table = dosort( table, args[1:] ) elif mode == 'rowfilter': table = dorowfilter( table, args[1:] ) else: print 'invalid mode:', mode sys.exit( 1 ) datalib.write( path_out, [table] )
def retrieve_data_type(timestep_dir, data_type, sim_type, time, max_time, limit): if data_type == 'time': time_data = retrieve_time_data(timestep_dir, sim_type, time, max_time, limit) return None, time_data, None filename = get_filename(data_type) path = os.path.join(timestep_dir, filename) if not os.path.exists(path): print 'Warning:', path, '.plt file is missing:' return None, None, None table = datalib.parse(path)[data_type] try: data = table.getColumn('Mean').data # should change to 'Mean' for new format data if limit: for i in range(len(data)): if data[i] > limit: data[i] = limit except KeyError: data = table.getColumn('Complexity').data # should change to 'Mean' for new format data try: agents = table.getColumn('AgentNumber').data # should change to 'AgentNumber' for new format data except KeyError: agents = table.getColumn('CritterNumber').data # should change to 'AgentNumber' for new format data return path, data, agents
def parse_avr( run_path, recent_type = "Recent", metrics = None ): # None gives you all metrics. return datalib.parse( path_avr(run_path, recent_type), tablenames = metrics, required = datalib.REQUIRED, keycolname = 'Timestep' )
def parse_avr(run_path, recent_type='Recent', complexities=None): # parse the AVRs for all the runs return datalib.parse(path_avr(run_path, recent_type), tablenames=complexities, required=datalib.REQUIRED, keycolname='Timestep')
def retrieve_data_type(timestep_dir, data_type, run_type, limit, other_data_type, death_times): if data_type in OTHER_DATA_TYPES: # for data not found in a datalib.plt file # use other_data_type as a template path, data, agents = retrieve_data_type(timestep_dir, other_data_type, run_type, limit, None, death_times) if not agents: return None, None, None if data_type == "time": time_data = retrieve_time_data(agents, death_times) return None, time_data, None filename = get_filename(data_type) path = os.path.join(timestep_dir, filename) if not os.path.exists(path): print "Warning:", path, ".plt file is missing:" return None, None, None table = datalib.parse(path)[data_type] try: data = table.getColumn("Mean").data # should change to 'Mean' for new format data # if limit: # for i in range(len(data)): # if data[i] > limit: # data[i] = 0.0 except KeyError: data = table.getColumn("Complexity").data # should change to 'Mean' for new format data try: agents = table.getColumn("AgentNumber").data # should change to 'AgentNumber' for new format data except KeyError: agents = table.getColumn("CritterNumber").data # should change to 'AgentNumber' for new format data return path, data, agents
def retrieve_timesteps(dir): avr_file = get_avr_file(dir) tables = datalib.parse(avr_file) metric_table = tables[tables.keys()[0]] # all tables should have Timesteps and the same number of them timesteps = metric_table.getColumn('Timestep').data return timesteps
def print_range(dirs, type, metrics): if not dirs or not metrics: return minv = {} maxv = {} mean = {} for metric in metrics: minv[metric] = 1000000.0 maxv[metric] = 0.0 mean[metric] = 0.0 for dir in dirs: data = {} avr_file = get_avr_file(dir) tables = datalib.parse(avr_file) for metric in metrics: metric_table = tables[metric] metric_min = metric_table.getColumn('min').data metric_max = metric_table.getColumn('max').data metric_mean = metric_table.getColumn('mean').data minv[metric] = min(minv[metric], float(min(metric_min))) maxv[metric] = max(maxv[metric], max(metric_max)) mean[metric] += sum(metric_mean) / len(metric_mean) for metric in metrics: mean[metric] /= len(dirs) print '%s:' % (type) print ' metric min max mean' for metric in metrics: print ' %s: %6g\t%6g\t%6g' % (metric, minv[metric], maxv[metric], mean[metric])
def parse_avr(run_path, recent_type = 'Recent', complexities = None): # parse the AVRs for all the runs return datalib.parse( path_avr(run_path, recent_type), tablenames = complexities, required = datalib.REQUIRED, keycolname = 'Timestep' )
def __get_stats( path_run, quiet = False ): path_datalib = os.path.join( path_run, 'stats', FILENAME_DATALIB ) if os.path.exists( path_datalib ): return datalib.parse( path_datalib, keycolname = 'step' ) if not quiet: # This can take a long time, so let the user we're not hung print 'Converting stats files into datalib format for run', path_run tables = __create_tables( path_run, quiet ) paths = glob.glob( os.path.join(path_run, 'stats', 'stat.*') ) paths.sort( lambda x, y: __path2step(x) - __path2step(y) ) for path in paths: __add_row( tables, path, quiet ) if not quiet: print '\nwriting %s' % path_datalib datalib.write( path_datalib, tables, randomAccess = False ) return tables
def load(contact_filename, cluster, filter=None): '''Loads contact information from the given filename, and determines inter and intra-cluster contacts from the given set of clusters''' # initialize filter if not set if filter is None: filter = lambda row: True contact_table = datalib.parse(contact_filename)['Contacts'] intra = defaultdict(int) inter = defaultdict(int) total = 0 for row in contact_table: total += 1 if filter(row): if cluster[row['Agent1']] == cluster[row['Agent2']]: intra[row['Agent1']] += 1 intra[row['Agent2']] += 1 else: inter[row['Agent1']] += 1 inter[row['Agent2']] += 1 if False and __debug__ and (total % 1000) == 0: print 'contacts[%d] = %d, intra = %d, inter = %d' %\ (row['Agent1'], intra[row['Agent1']] + inter[row['Agent1']], intra[row['Agent1']], inter[row['Agent1']]) print 'contacts[%d] = %d, intra = %d, inter = %d' %\ (row['Agent2'], intra[row['Agent2']] + inter[row['Agent2']], intra[row['Agent2']], inter[row['Agent2']]) return (intra, inter)
def test(): #for x in __parse_file( '../run_tau60k_from18k_ws200/stats/stat.1' ): # print x for tablename, table in datalib.parse( '../run_tau60k_from18k_ws200/stats/datalib.txt' ).items(): print '---', tablename, table.name, '---' for row in table.rows(): print row['value']
def test__misc(): print '-----------------------------------' COLNAMES = ['T', 'A', 'B', 'C'] COLTYPES = ['int', 'int', 'int', 'int'] t = datalib.Table('test', COLNAMES, COLTYPES, keycolname = 'T') row = t.createRow() row.set('T', 0) row['A'] = 10 row.set('B', 11) row.set('C', 12) row = t.createRow() row['T'] = 1 row['A'] = 20 row['B'] = 21 row['C'] = 22 print t[0]['A'] print t[1]['A'] it = iterators.MatrixIterator(t, range(2), ['A','B']) for a in it: print a datalib.write('/tmp/datalib1', t) print '-----------------------------------' table = datalib.Table(name='Example 2', colnames=['Time','A','B'], coltypes=['int','float','float'], keycolname='Time') row = table.createRow() row['Time'] = 1 row['A'] = 100.0 row['B'] = 101.0 row = table.createRow() row['Time'] = 10001 row['A'] = 200.0 row['B'] = 201.0 it = iterators.MatrixIterator(table, range(1,10002,10000), ['B']) for a in it: print a datalib.write('/tmp/datalib2', table) tables = datalib.parse('/tmp/datalib2', keycolname = 'Time') table = tables['Example 2'] print 'key=',table.keycolname print tables['Example 2'][1]['A']
def get_cc_len(dir, neuron_set, graph_type, length_type): avr_file = get_avr_file(dir) tables = datalib.parse(avr_file) cc_table = tables['cc_'+neuron_set+'_'+graph_type] len_table = tables[length_type+'_'+neuron_set+'_'+graph_type] cc = cc_table.getColumn('mean').data len = len_table.getColumn('mean').data return cc, len
def test(): #for x in __parse_file( '../run_tau60k_from18k_ws200/stats/stat.1' ): # print x for tablename, table in datalib.parse( '../run_tau60k_from18k_ws200/stats/datalib.txt').items(): print '---', tablename, table.name, '---' for row in table.rows(): print row['value']
def __init__( self, path_run ): path_log = os.path.join(path_run, 'genome/separations.txt') class state: tables = {} def __beginTable( tablename, colnames, coltypes, path, table_index, keycolname ): agentNumber = int( tablename ) state.currTable = {} state.tables[ agentNumber ] = state.currTable def __row( row ): state.currTable[ row['Agent'] ] = row[ 'Separation' ] datalib.parse( path_log, stream_beginTable = __beginTable, stream_row = __row ) self.tables = state.tables
def positions(self): ''' Lazy loading of position data''' position_table = datalib.parse(self.filename, keycolname='Timestep')['Positions'] positions = {} for row in position_table.rowlist: positions.update({row['Timestep']: (row['x'], row['y'], row['z'])}) return positions
def copy_metrics(source_file, target_file): global test, overwrite, metrics_to_copy if test: print ' copying metrics (%s) from' % (','.join(metrics_to_copy)), source_file, 'to', target_file else: tables_to_copy = {} source_tables = datalib.parse(source_file) for metric in metrics_to_copy: tables_to_copy[metric] = source_tables[metric] datalib.write(target_file, tables_to_copy, append=True, replace=overwrite)
def positions(self): ''' Lazy loading of position data''' position_table = datalib.parse(self.filename, keycolname='Timestep')['Positions'] positions = {} for row in position_table.rowlist: positions.update({ row['Timestep'] : (row['x'], row['y'], row['z']) }) return positions
def __init__(self, path_run): path_log = os.path.join(path_run, 'genome/separations.txt') class state: tables = {} def __beginTable(tablename, colnames, coltypes, path, table_index, keycolname): agentNumber = int(tablename) state.currTable = {} state.tables[agentNumber] = state.currTable def __row(row): state.currTable[row['Agent']] = row['Separation'] datalib.parse(path_log, stream_beginTable=__beginTable, stream_row=__row) self.tables = state.tables
def _get_complexity(self): if self.death_reason == 'NATURAL': # TODO: LazyDict if self.complexity_filename not in complexity: complexity[self.complexity_filename] =\ datalib.parse(self.complexity_filename, keycolname='CritterNumber')['P'] complexity_table = complexity[self.complexity_filename] return complexity_table[self.id]['Complexity'] else: return 0.0
def test__stream(): colnames = ['ID', 'separation'] coltypes = ['int', 'float'] t1 = datalib.Table('1', colnames, coltypes) t2 = datalib.Table('2', colnames, coltypes) def __addrow(table, id, separation): row = table.createRow() row['ID'] = id row['separation'] = separation __addrow(t1, '1', .01) __addrow(t1, '2', .02) __addrow(t2, '11', .11) __addrow(t2, '12', .12) datalib.write( 'stream.plt', [t1,t2] ) def stream_row( row ): print row['ID'], row['separation'] datalib.parse( 'stream.plt', stream_row = stream_row )
def retrieve_data(run_dir, x_data_type, y_data_type): global use_hf_coloration recent_dir = os.path.join(run_dir, 'brain/Recent') x_filename = get_filename(x_data_type) x_path = os.path.join(recent_dir, x_filename) y_filename = get_filename(y_data_type) y_path = os.path.join(recent_dir, y_filename) if not (os.path.exists(x_path) and os.path.exists(y_path)): print 'Error! Needed Avr files are missing' exit(2) x_table = datalib.parse(x_path)[x_data_type] y_table = datalib.parse(y_path)[y_data_type] x_data = x_table.getColumn('mean').data y_data = y_table.getColumn('mean').data if use_hf_coloration: got_hf_data = False try: hf_table = datalib.parse(y_path)['hf'] got_hf_data = True except KeyError: try: hf_table = datalib.parse(x_path)['hf'] except KeyError: # must resort to time data because hf isn't available use_hf_coloration = False pass if got_hf_data: t_data = hf_table.getColumn('mean').data else: t_data = x_table.getColumn( 'Timestep').data # could come from either x or y else: t_data = x_table.getColumn( 'Timestep').data # could come from either x or y return x_data, y_data, t_data
def retrieve_data_type(timestep_dir, data_type, sim_type, limit, other_data_type): if data_type in OTHER_DATA_TYPES: # for data not found in a datalib.plt file # use P complexity as a template path, data, agents = retrieve_data_type(timestep_dir, other_data_type, sim_type, limit, None) if not agents: return None, None, None if data_type == 'time': time_data = retrieve_time_data(agents) return None, time_data, None if data_type == 'neuronCount': neuron_data = retrieve_neuron_data(timestep_dir, agents) return None, neuron_data, agents if data_type == 'synapseCount': synapse_data = retrieve_synapse_data(timestep_dir, agents) return None, synapse_data, agents filename = get_filename(data_type) path = os.path.join(timestep_dir, filename) if not os.path.exists(path): print 'Warning:', path, '.plt file is missing:' return None, None, None table = datalib.parse(path)[data_type] try: data = table.getColumn( 'Mean').data # should change to 'Mean' for new format data # if limit: # for i in range(len(data)): # if data[i] > limit: # data[i] = 0.0 except KeyError: data = table.getColumn( 'Complexity').data # should change to 'Mean' for new format data try: agents = table.getColumn( 'AgentNumber' ).data # should change to 'AgentNumber' for new format data except KeyError: agents = table.getColumn( 'CritterNumber' ).data # should change to 'AgentNumber' for new format data return path, data, agents
def get_agent_ids_during_time(start, stop, run_dir='../run/'): ''' get all agent ids alive between start and stop ''' lifespans_filename = "%s/lifespans.txt" % (run_dir) if lifespans_filename not in lifespans: lifespans[lifespans_filename] =\ datalib.parse(lifespans_filename, keycolname='Agent')['LifeSpans'] lifespan_table = lifespans[lifespans_filename] alive = [] for row in lifespan_table.rows(): if (start <= row['DeathStep']) and (row['BirthStep'] <= stop): alive.append(row['Agent']) return alive
def get_agent_ids_until_time(time, run_dir='../run/'): ''' get all agent ids alive before timestep time. ''' lifespans_filename = "%s/lifespans.txt" % (run_dir) if lifespans_filename not in lifespans: lifespans[lifespans_filename] =\ datalib.parse(lifespans_filename, keycolname='Agent')['LifeSpans'] lifespan_table = lifespans[lifespans_filename] alive = [] for row in lifespan_table.rows(): if (row['BirthStep'] < time): alive.append(row['Agent']) return alive
def get_agent_ids_at_time(time, run_dir='../run/'): ''' get agent ids at timestep time. This is used to build an arg for get_agents. ''' lifespans_filename = "%s/lifespans.txt" % (run_dir) if lifespans_filename not in lifespans: lifespans[lifespans_filename] =\ datalib.parse(lifespans_filename, keycolname='Agent')['LifeSpans'] lifespan_table = lifespans[lifespans_filename] alive = [] for row in lifespan_table.rows(): if (row['BirthStep'] <= time) and (row['DeathStep'] > time): alive.append(row['Agent']) return alive
def __get_stats(path_run): path_datalib = os.path.join(path_run, 'stats', FILENAME_DATALIB) if os.path.exists(path_datalib): return datalib.parse(path_datalib, keycolname='step') tables = __create_tables(path_run) paths = glob.glob(os.path.join(path_run, 'stats', 'stat.*')) paths.sort(lambda x, y: __path2step(x) - __path2step(y)) for path in paths: __add_row(tables, path) datalib.write(path_datalib, tables) return tables
def rename_metrics_in_file(file, metrics_to_rename): global test, verbose tables = datalib.parse(file) not_renamed = [] renamed = [] for table in tables.values(): if table.name in metrics_to_rename: renamed.append((table.name, metrics_to_rename[table.name])) table.name = metrics_to_rename[table.name] else: not_renamed.append(table.name) if renamed: if test: print 'renaming', renamed, 'in', file else: datalib.write(file, tables) if not_renamed and verbose: print 'not renaming', not_renamed, 'in', file
def rename_metrics_in_file(file, metrics_to_rename): global test tables = datalib.parse(file) not_renamed = [] renamed = [] for table in tables.values(): if table.name in metrics_to_rename: renamed.append((table.name, metrics_to_rename[table.name])) table.name = metrics_to_rename[table.name] else: not_renamed.append(table.name) if renamed: if test: print 'renaming', renamed, 'in', file else: datalib.write(file, tables) if test and not_renamed: print 'not renaming', not_renamed, 'in', file
def __get_stats( path_run ): path_datalib = os.path.join( path_run, 'stats', FILENAME_DATALIB ) if os.path.exists( path_datalib ): return datalib.parse( path_datalib, keycolname = 'step' ) tables = __create_tables( path_run ) paths = glob.glob( os.path.join(path_run, 'stats', 'stat.*') ) paths.sort( lambda x, y: __path2step(x) - __path2step(y) ) for path in paths: __add_row( tables, path ) datalib.write( path_datalib, tables ) return tables
def rename_metrics_in_file(file, metrics_to_rename): global test, verbose tables = datalib.parse(file) not_renamed = [] renamed = [] for table in tables.values(): if table.name in metrics_to_rename: renamed.append((table.name, metrics_to_rename[table.name])) table.name = metrics_to_rename[table.name] else: not_renamed.append(table.name) if renamed: if test: print "renaming", renamed, "in", file else: datalib.write(file, tables) if not_renamed and verbose: print "not renaming", not_renamed, "in", file
def compute_complexities(complexities, timestep_directory, tdata): def __path(type): return os.path.join(timestep_directory, 'complexity_' + type + '.plt') # --- Read in any complexities computed on a previous invocation of this script complexities_read = [] for type in complexities: path = __path(type) if os.path.isfile(path): try: table = datalib.parse(path)[type] data = table.getColumn('Complexity').data tdata[type] = common_complexity.normalize_complexities(data) complexities_read.append(type) except datalib.InvalidFileError, e: # file must have been incomplete print "Failed reading ", path, "(", e, ") ... regenerating"
def compute_complexities(complexities, timestep, timestep_directory, tdata): def __path(type): return os.path.join(timestep_directory, 'complexity_' + type + '.plt') # --- Read in any complexities computed on a previous invocation of this script complexities_read = [] if not OverwriteEpochComplexities: for type in complexities: path = __path(type) if os.path.isfile(path): try: table = datalib.parse(path)[type] data = table.getColumn('Complexity').data tdata[type] = common_complexity.normalize_complexities( data) complexities_read.append(type) except datalib.InvalidFileError, e: # file must have been incomplete print "Failed reading ", path, "(", e, ") ... regenerating"
def retrieve_data_type(timestep_dir, data_type, sim_type, limit, other_data_type): if data_type in OTHER_DATA_TYPES: # for data not found in a datalib.plt file # use P complexity as a template path, data, agents = retrieve_data_type(timestep_dir, other_data_type, sim_type, limit, None) if not agents: return None, None, None if data_type == 'time': time_data = retrieve_time_data(agents) return None, time_data, None if data_type == 'neuronCount': neuron_data = retrieve_neuron_data(timestep_dir, agents) return None, neuron_data, agents if data_type == 'synapseCount': synapse_data = retrieve_synapse_data(timestep_dir, agents) return None, synapse_data, agents filename = get_filename(data_type) path = os.path.join(timestep_dir, filename) if not os.path.exists(path): print 'Warning:', path, '.plt file is missing:' return None, None, None table = datalib.parse(path)[data_type] try: data = table.getColumn('Mean').data # should change to 'Mean' for new format data if limit: for i in range(len(data)): if data[i] > limit: data[i] = limit except KeyError: data = table.getColumn('Complexity').data # should change to 'Mean' for new format data try: agents = table.getColumn('AgentNumber').data # should change to 'AgentNumber' for new format data except KeyError: agents = table.getColumn('CritterNumber').data # should change to 'AgentNumber' for new format data return path, data, agents
def __get_stats(path_run, quiet=False): path_datalib = os.path.join(path_run, 'stats', FILENAME_DATALIB) if os.path.exists(path_datalib): return datalib.parse(path_datalib, keycolname='step') if not quiet: # This can take a long time, so let the user we're not hung print 'Converting stats files into datalib format for run', path_run tables = __create_tables(path_run, quiet) paths = glob.glob(os.path.join(path_run, 'stats', 'stat.*')) paths.sort(lambda x, y: __path2step(x) - __path2step(y)) for path in paths: __add_row(tables, path, quiet) if not quiet: print '\nwriting %s' % path_datalib datalib.write(path_datalib, tables, randomAccess=False) return tables
def retrieve_data_type(timestep_dir, data_type, sim_type, time, max_time, limit): if data_type == 'time': time_data = retrieve_time_data(timestep_dir, sim_type, time, max_time, limit) return None, time_data, None filename = get_filename(data_type) path = os.path.join(timestep_dir, filename) if not os.path.exists(path): print 'Warning:', path, '.plt file is missing:' return None, None, None table = datalib.parse(path)[data_type] try: data = table.getColumn( 'Mean').data # should change to 'Mean' for new format data if limit: for i in range(len(data)): if data[i] > limit: data[i] = limit except KeyError: data = table.getColumn( 'Complexity').data # should change to 'Mean' for new format data try: agents = table.getColumn( 'AgentNumber' ).data # should change to 'AgentNumber' for new format data except KeyError: agents = table.getColumn( 'CritterNumber' ).data # should change to 'AgentNumber' for new format data return path, data, agents
def move_metrics_in_file(source_file, target_file, metrics_to_move): global test tables = datalib.parse(source_file) not_moving = [] moving = [] for table in tables.values(): if table.name in metrics_to_move: moving.append((table.name, metrics_to_move[table.name])) table.name = metrics_to_move[table.name] else: not_moving.append(table.name) if moving: if test: print 'moving', moving, 'from', source_file, 'to', target_file if len(not_moving) == 0: print 'unlinking', source_file else: print 'not unlinking', source_file, 'due to unmoved metrics:', not_moving else: datalib.write(target_file, tables, append=True) if len(not_moving) == 0: os.unlink(source_file) elif test and not_moving: print 'not moving', not_moving, 'from', source_file, 'and not unlinking file'
def retrieve_data(timestep_dir, x_data_type, y_data_type, z_data_type, c_data_type, run_type, time, max_time, cmult, opacity, death_times): global use_hf_coloration, xlow, xhigh, ylow, yhigh, zlow, zhigh global xmin, xmax, ymin, ymax, zmin, zmax, xmean, ymean, zmean, xmean2, ymean2, zmean2, num_points x_path, x_data, x_agents = retrieve_data_type(timestep_dir, x_data_type, run_type, LimitX, y_data_type, death_times) y_path, y_data, y_agents = retrieve_data_type(timestep_dir, y_data_type, run_type, LimitY, x_data_type, death_times) z_path, z_data, z_agents = retrieve_data_type(timestep_dir, z_data_type, run_type, LimitZ, x_data_type, death_times) if c_data_type: c_path, c_data_vals, c_agents = retrieve_data_type( timestep_dir, c_data_type, run_type, 0.0, x_data_type, death_times) if not x_data or not y_data or not z_data: return None, None, None, None # Deal with fact that some data types don't have a datalib path or list of agents if not x_path: if y_path: x_path = y_path else: x_path = z_path if not y_path: if x_path: y_path = x_path else: y_path = z_path if not z_path: if x_path: z_path = x_path else: z_path = y_path if not x_agents: if y_agents: x_agents = y_agents else: x_agents = z_agents if not y_agents: if x_agents: y_agents = x_agents else: y_agents = z_agents if not z_agents: if x_agents: z_agents = x_agents else: z_agents = y_agents # Get rid of any agents (and corresponding data points) not in all three data sets excluded_agents = list_difference(x_agents, y_agents) # agents in x, but not in y if excluded_agents: for agent in excluded_agents: agent_idx = x_agents.index(agent) x_agents.pop(agent_idx) x_data.pop(agent_idx) excluded_agents = list_difference(y_agents, x_agents) # agents in y, but not in x if excluded_agents: for agent in excluded_agents: agent_idx = y_agents.index(agent) y_agents.pop(agent_idx) y_data.pop(agent_idx) len_y = len(y_agents) excluded_agents = list_difference(y_agents, z_agents) # agents in y, but not in z if excluded_agents: for agent in excluded_agents: agent_idx = y_agents.index(agent) y_agents.pop(agent_idx) y_data.pop(agent_idx) excluded_agents = list_difference(z_agents, y_agents) # agents in z, but not in y if excluded_agents: for agent in excluded_agents: agent_idx = z_agents.index(agent) z_agents.pop(agent_idx) z_data.pop(agent_idx) if len_y != len(y_agents): # y had agents eliminated due to z, so now have to remove them from x as well excluded_agents = list_difference( x_agents, y_agents) # agents in x, but not in y if excluded_agents: for agent in excluded_agents: agent_idx = x_agents.index(agent) x_agents.pop(agent_idx) x_data.pop(agent_idx) # Make the same random selections from all data sets to properly decimate the data ran_list = [random.random() for x in x_data] x_data = filter_data(x_data, ran_list) x_agents = filter_data(x_agents, ran_list) y_data = filter_data(y_data, ran_list) y_agents = filter_data(y_agents, ran_list) z_data = filter_data(z_data, ran_list) z_agents = filter_data(z_agents, ran_list) got_hf_data = False if use_hf_coloration: try: hf_table = datalib.parse(y_path)['hf'] got_hf_data = True except KeyError: try: hf_table = datalib.parse(x_path)['hf'] got_hf_data = True except KeyError: try: hf_table = datalib.parse(z_path)['hf'] got_hf_data = True except KeyError: pass if got_hf_data: hf_data = hf_table.getColumn('Mean').data c_data = map(lambda x: get_hf_color(x, opacity), hf_data) else: if use_color: c_data = map( lambda x: get_time_color( float(time) / float(max_time), run_type, cmult, opacity), range(len(x_data))) else: c_data = map(lambda x: 0.5, range(len(x_data))) if c_data_type: modify_color(x_agents, c_agents, c_data_vals, c_data, opacity) for i in range(len(x_data)): if x_data[i] < xmin: xmin = x_data[i] if x_data[i] > xmax: xmax = x_data[i] if y_data[i] < ymin: ymin = y_data[i] if y_data[i] > ymax: ymax = y_data[i] if z_data[i] < zmin: zmin = z_data[i] if z_data[i] > zmax: zmax = z_data[i] xmean += x_data[i] ymean += y_data[i] zmean += z_data[i] xmean2 += x_data[i] * x_data[i] ymean2 += y_data[i] * y_data[i] zmean2 += z_data[i] * z_data[i] num_points += 1 return x_data, y_data, z_data, c_data
def retrieve_data(timestep_dir, x_data_type, y_data_type, sim_type, time, max_time, opacity): global use_hf_coloration x_path, x_data, x_agents = retrieve_data_type(timestep_dir, x_data_type, sim_type, LimitX, y_data_type) y_path, y_data, y_agents = retrieve_data_type(timestep_dir, y_data_type, sim_type, LimitY, x_data_type) if not x_data or not y_data: return None, None, None # Deal with fact that some data types don't have a datalib path or list of agents if not x_path: x_path = y_path elif not y_path: y_path = x_path if not x_agents: x_agents = y_agents if not y_agents: y_agents = x_agents # Get rid of any agents (and corresponding data points) not in both data sets excluded_agents = list_difference(x_agents, y_agents) # agents in x, but not in y if excluded_agents: for agent in excluded_agents: agent_idx = x_agents.index(agent) x_agents.pop(agent_idx) x_data.pop(agent_idx) excluded_agents = list_difference(y_agents, x_agents) # agents in y, but not in x if excluded_agents: for agent in excluded_agents: agent_idx = y_agents.index(agent) y_agents.pop(agent_idx) y_data.pop(agent_idx) got_hf_data = False if use_hf_coloration: try: hf_table = datalib.parse(y_path)['hf'] got_hf_data = True except KeyError: try: hf_table = datalib.parse(x_path)['hf'] got_hf_data = True except KeyError: pass if got_hf_data: hf_data = hf_table.getColumn('Mean').data c_data = map(lambda x: get_hf_color(x, opacity), hf_data) else: if use_color: c_data = map( lambda x: get_time_color( float(time) / float(max_time), sim_type, opacity), range(len(x_data))) else: c_data = map(lambda x: 0.5, range(len(x_data))) return x_data, y_data, c_data
def __init__(self, path_run): self.schema = GenomeSchema(path_run) self.subsets = datalib.parse(os.path.join(path_run, 'genome/subset.log'), keycolname='Agent')['GenomeSubset']
def __init__(self, path_run): self.table = datalib.parse(os.path.join(path_run, 'lifespans.txt'), keycolname='Agent')['LifeSpans']