def mergeliftOver(f1, f2, annotations, outputfile, verbose="F"): o = open(outputfile, 'w') # read in file 1 and make dictionary readdict = OrderedDict() f = open(f1, 'r') for l in f: e = l.strip().split('\t') readdict[e[3]] = e[:3] f.close() # read in file2 and print out matches f = open(f2, 'r') for l in f: e = l.strip().split('\t') if e[3] in readdict: readdict[e[3]] += e[:3] f = open(annotations, 'r') for l in f: e = l.strip().split('\t') if e[-1] in readdict: readdict[e[-1]] += e[:-1] for i, val in readdict.items(): print("\t".join(val), file=o) f.close() o.close()
def group_exceptions(error_requests, exceptions, tracebacks): """ Groups exceptions into a form usable by an exception. :param error_requests: the error requests :param exceptions: the exceptions :param tracebacks: the tracebacks :return: a sorted exception pile :rtype: dict(Exception,_Group) """ data = OrderedDict() for error_request, exception, trace_back in zip( error_requests, exceptions, tracebacks): for stored_exception in data.keys(): if isinstance(exception, type(stored_exception)): found_exception = stored_exception break else: data[exception] = _Group(trace_back) found_exception = exception data[found_exception].add_coord(error_request.sdp_header) for exception in data: data[exception].finalise() return data.items()
def generate_tests(): NOT_SUPPORTED = (DataModels.CIM.name.lower(), SearchPlatforms.ELASTIC.name.lower()) test_classes = OrderedDict() for file_name, test_content in input_files().items(): if 'matches' in test_content or 'nonmatches' in test_content: platforms, models = collect_targets(test_content) # Need to explode the platform and model combinations to do each for platform in platforms: for model in models: if ((model.value, platform.value) == (NOT_SUPPORTED)): break # Build a new TestCase class for each platform class_key = (model.value.title(), platform.value.title()) if class_key not in test_classes: new_class = types.new_class( "Test{}".format(''.join(class_key)), bases=(IntegrationTests, )) new_class.__module__ = __name__ new_class.test_type = platform test_classes[class_key] = new_class # generate the test method test_name = "test_integration_{}_{}_{}".format( file_name, model.value, platform.value) test = IntegrationTests.integration_test_generator( model, platform, test_content) # Add the generated test to the appropriate target class. destination_class = test_classes.get( class_key, IntegrationTests) setattr(destination_class, test_name, test) for _, dynamic_class in sorted(test_classes.items()): globals()[str(dynamic_class)] = dynamic_class pass
class TestDataFrame(unittest.TestCase): def setUp(self): self.tmpdir = TempDir("dataframetest") self.testfilename = os.path.join(self.tmpdir.path, "dataframetest.nix") self.file = nix.File.open(self.testfilename, nix.FileMode.Overwrite) self.block = self.file.create_block("test block", "recordingsession") self.df1_dtype = OrderedDict([('name', np.int64), ('id', str), ('time', float), ('sig1', np.float64), ('sig2', np.int32)]) self.df1_data = [(1, "alpha", 20.18, 5.0, 100), (2, "beta", 20.09, 5.5, 101), (2, "gamma", 20.05, 5.1, 100), (1, "delta", 20.15, 5.3, 150), (2, "epsilon", 20.23, 5.7, 200), (2, "fi", 20.07, 5.2, 300), (1, "zeta", 20.12, 5.1, 39), (1, "eta", 20.27, 5.1, 600), (2, "theta", 20.15, 5.6, 400), (2, "iota", 20.08, 5.1, 200)] other_arr = np.arange(11101, 11200).reshape((33, 3)) other_di = OrderedDict({'name': np.int64, 'id': int, 'time': float}) self.df1 = self.block.create_data_frame("test df", "signal1", data=self.df1_data, col_dict=self.df1_dtype) self.df2 = self.block.create_data_frame("other df", "signal2", data=self.df1_data, col_dict=self.df1_dtype) self.df3 = self.block.create_data_frame("reference df", "signal3", data=other_arr, col_dict=other_di) self.dtype = self.df1._h5group.group["data"].dtype def tearDown(self): self.file.close() self.tmpdir.cleanup() def test_data_frame_eq(self): assert self.df1 == self.df1 assert not self.df1 == self.df2 assert self.df2 == self.df2 assert self.df1 is not None assert self.df2 is not None def test_create_with_list(self): arr = [(1, 'a', 20.18, 5.1, 100), (2, 'b', 20.09, 5.5, 101), (2, 'c', 20.05, 5.1, 100)] namelist = np.array(['name', 'id', 'time', 'sig1', 'sig2']) dtlist = np.array([np.int64, str, float, np.float64, np.int32]) df_li = self.block.create_data_frame("test_list", "make_of_list", data=arr, col_names=namelist, col_dtypes=dtlist) assert df_li.column_names == self.df1.column_names assert df_li.dtype == self.df1.dtype for i in df_li[:]: self.assertIsInstance(i['id'], string_types) self.assertIsInstance(i['sig2'], np.int32) def test_column_name_collision(self): arr = [(1, 'a', 20.18, 5.1, 100), (2, 'b', 20.09, 5.5, 101), (2, 'c', 20.05, 5.1, 100)] dtlist = np.array([np.int64, str, float, np.float64, np.int32]) namelist = np.array(['name', 'name', 'name', 'name', 'name']) self.assertRaises(nix.exceptions.DuplicateColumnName, self.block.create_data_frame, 'testerror', 'for_test', col_names=namelist, col_dtypes=dtlist, data=arr) def test_data_frame_type(self): assert self.df1.type == "signal1" self.df1.type = "test change" assert self.df1.type == "test change" def test_write_row(self): # test write single row row = ["1", 'abc', 3, 4.4556356242341, 5.1111111] assert list(self.df1[9]) == [2, 'iota', 20.08, 5.1, 200] self.df1.write_rows([row], [9]) assert list(self.df1[9]) == [1, 'abc', 3., 4.4556356242341, 5] self.assertIsInstance(self.df1[9]['name'], np.integer) self.assertIsInstance(self.df1[9]['sig2'], np.int32) assert self.df1[9]['sig2'] == int(5) # test write multiple rows multi_rows = [[1775, '12355', 1777, 1778, 1779], [1785, '12355', 1787, 1788, 1789]] self.df1.write_rows(multi_rows, [1, 2]) assert list(self.df1[1]) == [1775, '12355', 1777, 1778, 1779] assert list(self.df1[2]) == [1785, '12355', 1787, 1788, 1789] def test_write_column(self): # write by name column1 = np.arange(10000, 10010) self.df1.write_column(column1, name='sig1') assert list(self.df1[:]['sig1']) == list(column1) # write by index column2 = np.arange(20000, 20010) self.df1.write_column(column2, index=4) assert list(self.df1[:]['sig2']) == list(column2) def test_read_row(self): df1_array = np.array(self.df1_data, dtype=list(self.df1_dtype.items())) # read single row assert self.df1.read_rows(0) == df1_array[0] # read multiple multi_rows = self.df1.read_rows(np.arange(4, 9)) np.testing.assert_array_equal(multi_rows, df1_array[4:9]) multi_rows = self.df1.read_rows([3, 6]) np.testing.assert_array_equal(multi_rows, [df1_array[3], df1_array[6]]) def test_read_column(self): # read single column by index single_idx_col = self.df1.read_columns(index=[1]) data = np.array([row[1] for row in self.df1_data], dtype=nix.DataType.String) np.testing.assert_array_equal(single_idx_col, data) # read multiple columns by name multi_col = self.df1.read_columns(name=['sig1', 'sig2']) data = [(row[3], row[4]) for row in self.df1_data] assert len(multi_col) == 10 for data_row, df_row in zip(data, multi_col): assert data_row == tuple(df_row) # read columns with slices slice_cols = self.df1.read_columns(name=['sig1', 'sig2'], slc=slice(0, 6)) data = [(row[3], row[4]) for row in self.df1_data[:6]] assert len(slice_cols) == 6 for data_row, df_row in zip(data, slice_cols): assert data_row == tuple(df_row) # read single column by name single_idx_col = self.df1.read_columns(name=["sig2"]) data = np.array([100, 101, 100, 150, 200, 300, 39, 600, 400, 200], dtype=nix.DataType.Int32) np.testing.assert_array_equal(single_idx_col, data) # Read multiple columns where one is string slice_str_cols = self.df1.read_columns(name=['id', 'sig2'], slc=slice(3, 10)) data = [(row[1], row[4]) for row in self.df1_data[3:10]] assert len(slice_str_cols) == 7 for data_row, df_row in zip(data, slice_str_cols): assert data_row == tuple(df_row) def test_index_column_by_name(self): for colidx, colname in enumerate(self.df1_dtype.keys()): expdata = [row[colidx] for row in self.df1_data] assert all(self.df1[colname] == expdata) def test_read_cell(self): # read cell by position scell = self.df1.read_cell(position=[5, 3]) assert scell == 5.2 # read cell by row_idx + col_name crcell = self.df1.read_cell(col_name=['id'], row_idx=9) assert crcell == 'iota' # test error raise if only one param given self.assertRaises(ValueError, self.df1.read_cell, row_idx=10) self.assertRaises(ValueError, self.df1.read_cell, col_name='sig1') def test_write_cell(self): # write cell by position self.df1.write_cell(105, position=[8, 3]) assert self.df1[8]['sig1'] == 105 # write cell by rowid colname self.df1.write_cell('test', col_name='id', row_idx=3) assert self.df1[3]['id'] == 'test' # test error raise self.assertRaises(ValueError, self.df1.write_cell, 11, col_name='sig1') def test_append_column(self): col_data = np.arange(start=16000, stop=16010, step=1) self.df1.append_column(col_data, name='trial_col', datatype=int) assert self.df1.column_names == ('name', 'id', 'time', 'sig1', 'sig2', 'trial_col') assert len(self.df1.dtype) == 6 k = np.array(self.df1[0:10]["trial_col"], dtype=np.int64) np.testing.assert_almost_equal(k, col_data) # too short column sh_col = np.arange(start=16000, stop=16003, step=1) with self.assertRaises(ValueError): self.df1.append_column(sh_col, name='sh_col') # too long column long = np.arange(start=16000, stop=16500, step=1) with self.assertRaises(ValueError): self.df1.append_column(long, name='long') def test_append_rows(self): # append single row srow = (1, "test", 3, 4, 5) self.df1.append_rows([srow]) assert self.df1[10] == np.array(srow, dtype=list(self.df1_dtype.items())) # append multi-rows mrows = [(1, "2", 3, 4, 5), (6, "testing", 8, 9, 10)] self.df1.append_rows(mrows) assert all(self.df1[-2:] == np.array( mrows, dtype=list(self.df1_dtype.items()))) # append row with incorrect length errrow = [5, 6, 7, 8] self.assertRaises(ValueError, self.df1.append_rows, [errrow]) def test_unit(self): assert self.df1.units is None self.df1.units = ["s", 'A', 'ms', 'Hz', 'mA'] np.testing.assert_array_equal(self.df1.units, np.array(["s", 'A', 'ms', 'Hz', 'mA'])) assert self.df2.units is None def test_df_shape(self): assert tuple(self.df1.df_shape) == (10, 5) # create df with incorrect dimension to see if Error is raised arr = np.arange(1000).reshape(10, 10, 10) if sys.version_info[0] == 3: with self.assertRaises(ValueError): self.block.create_data_frame('err', 'err', {'name': np.int64}, data=arr) def test_data_type(self): assert self.df1.dtype[4] == np.int32 assert self.df1.dtype[0] != self.df1.dtype[4] assert self.df1.dtype[2] == self.df1.dtype[3] def test_create_without_dtypes(self): data = np.array([("a", 1, 2.2), ("b", 2, 3.3), ("c", 3, 4.4)], dtype=[('name', 'U10'), ("id", 'i4'), ('val', 'f4')]) df = self.block.create_data_frame("without_name", "test", data=data) assert sorted(list(df.column_names)) == sorted(["name", "id", "val"]) assert sorted(list(df["name"])) == ["a", "b", "c"] def test_timestamp_autoupdate(self): self.file.auto_update_timestamps = True df = self.block.create_data_frame("df.time", "test.time", col_dict=OrderedDict({"idx": int})) dftime = df.updated_at time.sleep(1) df.units = ("ly", ) self.assertNotEqual(dftime, df.updated_at) def test_timestamp_noautoupdate(self): self.file.auto_update_timestamps = False df = self.block.create_data_frame("df.time", "test.time", col_dict=OrderedDict({"idx": int})) dftime = df.updated_at time.sleep(1) df.units = ("ly", ) self.assertEqual(dftime, df.updated_at)
def create_data_frame(self, name="", type_="", col_dict=None, col_names=None, col_dtypes=None, data=None, compression=Compression.No, copy_from=None, keep_copy_id=True): """ Create/copy a new data frame for this block. Either ``col_dict`` or ``col_name`` and ``col_dtypes`` must be given. If both are given, ``col_dict`` will be used. :param name: The name of the data frame to create/copy. :type name: str :param type_: The type of the data frame. :type type_: str :param col_dict: The dictionary that specifies column names and data type in each column :type col_dict: dict or OrderedDict of {str: type} :param col_names: The collection of name of all columns in order :type col_names: tuples or list or np.array of string :param col_dtypes: The collection of data type of all columns in order :type col_dtypes: tuples or list or np.array of type :param data: Data to write after storage has been created :type data: array-like data with compound data type as specified in the columns :param compression: En-/disable dataset compression. :type compression: :class:`~nixio.Compression` :param copy_from: The DataFrame to be copied, None in normal mode :type copy_from: nixio.DataFrame :param keep_copy_id: Specify if the id should be copied in copy mode :type keep_copy_id: bool :returns: The newly created data frame. :rtype: :class:`~nixio.DataFrame` """ if copy_from: if not isinstance(copy_from, DataFrame): raise TypeError("Object to be copied is not a DataFrame") objid = self._copy_objects(copy_from, "data_frames", keep_copy_id, name) return self.data_frames[objid] util.check_entity_name_and_type(name, type_) if (isinstance(col_dict, dict) and not isinstance(col_dict, OrderedDict) and sys.version_info[0] < 3): raise TypeError("Cannot create a DataFrame from a dictionary " "in Python 2 as the order of keys is not " "preserved. Please use the OrderedDict class " "from the collections module instead.") if data is not None: shape = len(data) else: shape = 0 data_frames = self._h5group.open_group("data_frames") if col_dict is None: if col_names is not None: if col_dtypes is not None: col_dict = OrderedDict( (str(nam), dt) for nam, dt in zip(col_names, col_dtypes)) elif col_dtypes is None and data is not None: col_dtypes = [] for val in data[0]: col_dtypes.append(type(val)) col_dict = OrderedDict( (str(nam), dt) for nam, dt in zip(col_names, col_dtypes)) else: # col_dtypes is None and data is None raise ValueError( "The data type of each column have to be specified") if len(col_names) != len(col_dict): raise exceptions.DuplicateColumnName else: # if col_names is None if data is not None and type(data[0]) == np.void: col_dtype = data[0].dtype col_names = list(col_dtype.fields.keys()) raw_dt = col_dtype.fields.values() raw_dt = list(raw_dt) raw_dt_list = [ele[0] for ele in raw_dt] col_dict = OrderedDict(zip(col_names, raw_dt_list)) if len(col_dtype.fields.values()) != len(col_dict): raise exceptions.DuplicateColumnName else: # data is None or type(data[0]) != np.void # data_type doesnt matter raise ValueError( "No information about column names is provided!") if col_dict is not None: for nam, dt in col_dict.items(): if isclass(dt): if any(issubclass(dt, st) for st in string_types) \ or issubclass(dt, np.string_): col_dict[nam] = util.vlen_str_dtype if 'U' in str(dt) or dt == np.string_: col_dict[nam] = util.vlen_str_dtype dt_arr = list(col_dict.items()) col_dtype = np.dtype(dt_arr) df = DataFrame.create_new(self.file, self, data_frames, name, type_, shape, col_dtype, compression) if data is not None: if type(data[0]) == np.void: data = np.ascontiguousarray(data, dtype=col_dtype) df.write_direct(data) else: data = list(map(tuple, data)) arr = np.ascontiguousarray(data, dtype=col_dtype) df.write_direct(arr) return df
def create_data_frame(self, name="", type_="", col_dict=None, col_names=None, col_dtypes=None, data=None, compression=Compression.No, copy_from=None, keep_copy_id=True): """ Create/copy a new data frame for this block. Either ``col_dict`` or ``col_name`` and ``col_dtypes`` must be given. If both are given, ``col_dict`` will be used. :param name: The name of the data frame to create/copy. :type name: str :param type_: The type of the data frame. :type type_: str :param col_dict: The dictionary that specify column names and data type in each column :type col_dict:dict or OrderedDict of {str: type} :param col_names: The collection of name of all columns in order :type col_names: tuples or list or np.array of string :param col_dtypes: The collection of data type of all columns in order :type col_dtypes: tuples or list or np.array of type :param data: Data to write after storage has been created :type data: array-like data with compound data type as specified in the columns :param compression: En-/disable dataset compression. :type compression: :class:`~nixio.Compression` :param copy_from: The DataFrame to be copied, None in normal mode :type copy_from: DataFrame :param keep_copy_id: Specify if the id should be copied in copy mode :type keep_copy_id: bool :returns: The newly created data frame. :rtype: :class:`~nixio.DataFrame` """ if copy_from: if not isinstance(copy_from, DataFrame): raise TypeError("Object to be copied is not a DataFrame") id = self._copy_objects(copy_from, "data_frames", keep_copy_id, name) return self.data_frames[id] util.check_entity_name_and_type(name, type_) if (isinstance(col_dict, dict) and not isinstance(col_dict, OrderedDict) and sys.version_info[0] < 3): raise TypeError("Python 2 users should use name_list " "or OrderedDict created with LIST and TUPLES " "to create DataFrames as the order " "of the columns cannot be maintained in Py2") if data is not None: shape = len(data) else: shape = 0 data_frames = self._h5group.open_group("data_frames") if col_dict is None: if col_names is not None: if col_dtypes is not None: col_dict = OrderedDict( (str(nam), dt) for nam, dt in zip(col_names, col_dtypes) ) elif col_dtypes is None and data is not None: col_dtypes = [] for x in data[0]: col_dtypes.append(type(x)) col_dict = OrderedDict( (str(nam), dt) for nam, dt in zip(col_names, col_dtypes) ) else: # col_dtypes is None and data is None raise (ValueError, "The data type of each column have to be specified") else: # if col_names is None if data is not None and type(data[0]) == np.void: col_dtype = data[0].dtype for i, dt in enumerate(col_dtype.fields.values()): if dt[0] == np.dtype(str): cn = list(col_dtype.fields.keys()) raw_dt = col_dtype.fields.values() raw_dt = list(raw_dt) raw_dt_list = [ele[0] for ele in raw_dt] col_dict = OrderedDict(zip(cn, raw_dt_list)) else: # data is None or type(data[0]) != np.void # data_type doesnt matter raise (ValueError, "No information about column names is provided!") if col_dict is not None: for nam, dt in col_dict.items(): if isclass(dt): if any(issubclass(dt, st) for st in string_types) \ or issubclass(dt, np.string_): col_dict[nam] = util.vlen_str_dtype dt_arr = list(col_dict.items()) col_dtype = np.dtype(dt_arr) df = DataFrame._create_new(self, data_frames, name, type_, shape, col_dtype, compression) if data is not None: if type(data[0]) == np.void: data = np.ascontiguousarray(data, dtype=col_dtype) df.write_direct(data) else: data = list(map(tuple, data)) arr = np.ascontiguousarray(data, dtype=col_dtype) df.write_direct(arr) return df
class CPUInfos(object): """ A set of CPU information objects. """ __slots__ = [ "_cpu_infos"] def __init__(self): self._cpu_infos = OrderedDict() def add_processor(self, x, y, processor_id, cpu_info): """ Add a processor on a given chip to the set. :param x: The x-coordinate of the chip :type x: int :param y: The y-coordinate of the chip :type y: int :param processor_id: A processor ID :type processor_id: int :param cpu_info: The CPU information for the core :type cpu_info: :py:class:`spinnman.model.enums.cpu_info.CPUInfo` """ self._cpu_infos[x, y, processor_id] = cpu_info @property def cpu_infos(self): """ The one per core core info. :return: iterable of x,y,p core info """ return iteritems(self._cpu_infos) def __iter__(self): return iter(self._cpu_infos) def iteritems(self): """ Get an iterable of (x, y, p), cpu_info """ return iteritems(self._cpu_infos) def items(self): return self._cpu_infos.items() def values(self): return self._cpu_infos.values() def itervalues(self): """ Get an iterable of cpu_info. """ return itervalues(self._cpu_infos) def keys(self): return self._cpu_infos.keys() def iterkeys(self): """ Get an iterable of (x, y, p). """ return iterkeys(self._cpu_infos) def __len__(self): """ The total number of processors that are in these core subsets. """ return len(self._cpu_infos)
class DaemonWatcher(object): """ Given a Ceph daemon's admin socket path, poll its performance counters and output a series of output lines showing the momentary values of counters of interest (those with the 'nick' property in Ceph's schema) """ (BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, GRAY) = range(8) RESET_SEQ = "\033[0m" COLOR_SEQ = "\033[1;%dm" COLOR_DARK_SEQ = "\033[0;%dm" BOLD_SEQ = "\033[1m" UNDERLINE_SEQ = "\033[4m" def __init__(self, asok, statpats=None, min_prio=0): self.asok_path = asok self._colored = False self._stats = None self._schema = None self._statpats = statpats self._stats_that_fit = dict() self._min_prio = min_prio self.termsize = Termsize() def supports_color(self, ostr): """ Returns True if the running system's terminal supports color, and False otherwise. """ unsupported_platform = (sys.platform in ('win32', 'Pocket PC')) # isatty is not always implemented, #6223. is_a_tty = hasattr(ostr, 'isatty') and ostr.isatty() if unsupported_platform or not is_a_tty: return False return True def colorize(self, msg, color, dark=False): """ Decorate `msg` with escape sequences to give the requested color """ return (self.COLOR_DARK_SEQ if dark else self.COLOR_SEQ) % (30 + color) \ + msg + self.RESET_SEQ def bold(self, msg): """ Decorate `msg` with escape sequences to make it appear bold """ return self.BOLD_SEQ + msg + self.RESET_SEQ def format_dimless(self, n, width): """ Format a number without units, so as to fit into `width` characters, substituting an appropriate unit suffix. """ units = [' ', 'k', 'M', 'G', 'T', 'P', 'E', 'Z'] unit = 0 while len("%s" % (int(n) // (1000**unit))) > width - 1: if unit >= len(units) - 1: break unit += 1 if unit > 0: truncated_float = ("%f" % (n / (1000.0**unit)))[0:width - 1] if truncated_float[-1] == '.': truncated_float = " " + truncated_float[0:-1] else: truncated_float = "%{wid}d".format(wid=width - 1) % n formatted = "%s%s" % (truncated_float, units[unit]) if self._colored: if n == 0: color = self.BLACK, False else: color = self.YELLOW, False return self.bold(self.colorize(formatted[0:-1], color[0], color[1])) \ + self.bold(self.colorize(formatted[-1], self.YELLOW, False)) else: return formatted def col_width(self, nick): """ Given the short name `nick` for a column, how many characters of width should the column be allocated? Does not include spacing between columns. """ return max(len(nick), 4) def get_stats_that_fit(self): ''' Get a possibly-truncated list of stats to display based on current terminal width. Allow breaking mid-section. ''' current_fit = OrderedDict() if self.termsize.changed or not self._stats_that_fit: width = 0 for section_name, names in self._stats.items(): for name, stat_data in names.items(): width += self.col_width(stat_data) + 1 if width > self.termsize.cols: break if section_name not in current_fit: current_fit[section_name] = OrderedDict() current_fit[section_name][name] = stat_data if width > self.termsize.cols: break self.termsize.reset_changed() changed = current_fit and (current_fit != self._stats_that_fit) if changed: self._stats_that_fit = current_fit return self._stats_that_fit, changed def _print_headers(self, ostr): """ Print a header row to `ostr` """ header = "" stats, _ = self.get_stats_that_fit() for section_name, names in stats.items(): section_width = \ sum([self.col_width(x) + 1 for x in names.values()]) - 1 pad = max(section_width - len(section_name), 0) pad_prefix = pad // 2 header += (pad_prefix * '-') header += (section_name[0:section_width]) header += ((pad - pad_prefix) * '-') header += ' ' header += "\n" ostr.write(self.colorize(header, self.BLUE, True)) sub_header = "" for section_name, names in stats.items(): for stat_name, stat_nick in names.items(): sub_header += self.UNDERLINE_SEQ \ + self.colorize( stat_nick.ljust(self.col_width(stat_nick)), self.BLUE) \ + ' ' sub_header = sub_header[0:-1] + self.colorize('|', self.BLUE) sub_header += "\n" ostr.write(sub_header) def _print_vals(self, ostr, dump, last_dump): """ Print a single row of values to `ostr`, based on deltas between `dump` and `last_dump`. """ val_row = "" fit, changed = self.get_stats_that_fit() if changed: self._print_headers(ostr) for section_name, names in fit.items(): for stat_name, stat_nick in names.items(): stat_type = self._schema[section_name][stat_name]['type'] if bool(stat_type & COUNTER): n = max( dump[section_name][stat_name] - last_dump[section_name][stat_name], 0) elif bool(stat_type & LONG_RUNNING_AVG): entries = dump[section_name][stat_name]['avgcount'] - \ last_dump[section_name][stat_name]['avgcount'] if entries: n = (dump[section_name][stat_name]['sum'] - last_dump[section_name][stat_name]['sum']) \ / float(entries) n *= 1000.0 # Present in milliseconds else: n = 0 else: n = dump[section_name][stat_name] val_row += self.format_dimless(n, self.col_width(stat_nick)) val_row += " " val_row = val_row[0:-1] val_row += self.colorize("|", self.BLUE) val_row = val_row[0:-len(self.colorize("|", self.BLUE))] ostr.write("{0}\n".format(val_row)) def _should_include(self, sect, name, prio): ''' boolean: should we output this stat? 1) If self._statpats exists and the name filename-glob-matches anything in the list, and prio is high enough, or 2) If self._statpats doesn't exist and prio is high enough then yes. ''' if self._statpats: sectname = '.'.join((sect, name)) if not any([ p for p in self._statpats if fnmatch(name, p) or fnmatch(sectname, p) ]): return False if self._min_prio is not None and prio is not None: return (prio >= self._min_prio) return True def _load_schema(self): """ Populate our instance-local copy of the daemon's performance counter schema, and work out which stats we will display. """ self._schema = json.loads(admin_socket( self.asok_path, ["perf", "schema"]).decode('utf-8'), object_pairs_hook=OrderedDict) # Build list of which stats we will display self._stats = OrderedDict() for section_name, section_stats in self._schema.items(): for name, schema_data in section_stats.items(): prio = schema_data.get('priority', 0) if self._should_include(section_name, name, prio): if section_name not in self._stats: self._stats[section_name] = OrderedDict() self._stats[section_name][name] = schema_data['nick'] if not len(self._stats): raise RuntimeError("no stats selected by filters") def _handle_sigwinch(self, signo, frame): self.termsize.update() def run(self, interval, count=None, ostr=sys.stdout): """ Print output at regular intervals until interrupted. :param ostr: Stream to which to send output """ self._load_schema() self._colored = self.supports_color(ostr) self._print_headers(ostr) last_dump = json.loads( admin_socket(self.asok_path, ["perf", "dump"]).decode('utf-8')) rows_since_header = 0 try: signal(SIGWINCH, self._handle_sigwinch) while True: dump = json.loads( admin_socket(self.asok_path, ["perf", "dump"]).decode('utf-8')) if rows_since_header >= self.termsize.rows - 2: self._print_headers(ostr) rows_since_header = 0 self._print_vals(ostr, dump, last_dump) if count is not None: count -= 1 if count <= 0: break rows_since_header += 1 last_dump = dump # time.sleep() is interrupted by SIGWINCH; avoid that end = time.time() + interval while time.time() < end: time.sleep(end - time.time()) except KeyboardInterrupt: return def list(self, ostr=sys.stdout): """ Show all selected stats with section, full name, nick, and prio """ table = PrettyTable(('section', 'name', 'nick', 'prio')) table.align['section'] = 'l' table.align['name'] = 'l' table.align['nick'] = 'l' table.align['prio'] = 'r' self._load_schema() for section_name, section_stats in self._stats.items(): for name, nick in section_stats.items(): prio = self._schema[section_name][name].get('priority') or 0 table.add_row((section_name, name, nick, prio)) ostr.write(table.get_string(hrules=HEADER) + '\n')
class NeuronRecorder(object): __slots__ = ["__indexes", "__n_neurons", "__sampling_rates"] N_BYTES_FOR_TIMESTAMP = 4 N_BYTES_PER_VALUE = 4 N_BYTES_PER_RATE = 4 # uint32 N_BYTES_PER_INDEX = 1 # currently uint8 N_BYTES_PER_SIZE = 4 N_CPU_CYCLES_PER_NEURON = 8 N_BYTES_PER_WORD = 4 N_BYTES_PER_POINTER = 4 SARK_BLOCK_SIZE = 8 # Seen in sark.c MAX_RATE = 2**32 - 1 # To allow a unit32_t to be used to store the rate def __init__(self, allowed_variables, n_neurons): self.__sampling_rates = OrderedDict() self.__indexes = dict() self.__n_neurons = n_neurons for variable in allowed_variables: self.__sampling_rates[variable] = 0 self.__indexes[variable] = None def _count_recording_per_slice(self, variable, vertex_slice): if self.__sampling_rates[variable] == 0: return 0 if self.__indexes[variable] is None: return vertex_slice.n_atoms return sum(vertex_slice.lo_atom <= index <= vertex_slice.hi_atom for index in self.__indexes[variable]) def _neurons_recording(self, variable, vertex_slice): if self.__sampling_rates[variable] == 0: return [] if self.__indexes[variable] is None: return range(vertex_slice.lo_atom, vertex_slice.hi_atom + 1) recording = [] indexes = self.__indexes[variable] for index in xrange(vertex_slice.lo_atom, vertex_slice.hi_atom + 1): if index in indexes: recording.append(index) return recording def get_neuron_sampling_interval(self, variable): """ Return the current sampling interval for this variable :param variable: PyNN name of the variable :return: Sampling interval in micro seconds """ step = globals_variables.get_simulator().machine_time_step / 1000 return self.__sampling_rates[variable] * step def get_matrix_data(self, label, buffer_manager, region, placements, graph_mapper, application_vertex, variable, n_machine_time_steps): """ Read a uint32 mapped to time and neuron IDs from the SpiNNaker\ machine. :param label: vertex label :param buffer_manager: the manager for buffered data :param region: the DSG region ID used for this data :param placements: the placements object :param graph_mapper: \ the mapping between application and machine vertices :param application_vertex: :param variable: PyNN name for the variable (V, gsy_inh etc.) :type variable: str :param n_machine_time_steps: :return: """ if variable == SPIKES: msg = "Variable {} is not supported use get_spikes".format(SPIKES) raise ConfigurationException(msg) vertices = graph_mapper.get_machine_vertices(application_vertex) progress = ProgressBar(vertices, "Getting {} for {}".format(variable, label)) sampling_rate = self.__sampling_rates[variable] expected_rows = int(math.ceil(n_machine_time_steps / sampling_rate)) missing_str = "" data = None indexes = [] for vertex in progress.over(vertices): placement = placements.get_placement_of_vertex(vertex) vertex_slice = graph_mapper.get_slice(vertex) neurons = self._neurons_recording(variable, vertex_slice) n_neurons = len(neurons) if n_neurons == 0: continue indexes.extend(neurons) # for buffering output info is taken form the buffer manager record_raw, missing_data = buffer_manager.get_data_by_placement( placement, region) record_length = len(record_raw) row_length = self.N_BYTES_FOR_TIMESTAMP + \ n_neurons * self.N_BYTES_PER_VALUE # There is one column for time and one for each neuron recording n_rows = record_length // row_length if record_length > 0: # Converts bytes to ints and make a matrix record = (numpy.asarray( record_raw, dtype="uint8").view(dtype="<i4")).reshape( (n_rows, (n_neurons + 1))) else: record = numpy.empty((0, n_neurons)) # Check if you have the expected data if not missing_data and n_rows == expected_rows: # Just cut the timestamps off to get the fragment fragment = (record[:, 1:] / float(DataType.S1615.scale)) else: missing_str += "({}, {}, {}); ".format(placement.x, placement.y, placement.p) # Start the fragment for this slice empty fragment = numpy.empty((expected_rows, n_neurons)) for i in xrange(0, expected_rows): time = i * sampling_rate # Check if there is data for this timestep local_indexes = numpy.where(record[:, 0] == time) if len(local_indexes[0]) == 1: fragment[i] = (record[local_indexes[0], 1:] / float(DataType.S1615.scale)) elif len(local_indexes[0]) > 1: logger.warning( "Population {} on multiple recorded data for " "time {}".format(label, time)) else: # Set row to nan fragment[i] = numpy.full(n_neurons, numpy.nan) if data is None: data = fragment else: # Add the slice fragment on axis 1 which is IDs/channel_index data = numpy.append(data, fragment, axis=1) if len(missing_str) > 0: logger.warning( "Population {} is missing recorded data in region {} from the" " following cores: {}".format(label, region, missing_str)) sampling_interval = self.get_neuron_sampling_interval(variable) return (data, indexes, sampling_interval) def get_spikes(self, label, buffer_manager, region, placements, graph_mapper, application_vertex, machine_time_step): spike_times = list() spike_ids = list() ms_per_tick = machine_time_step / 1000.0 vertices = graph_mapper.get_machine_vertices(application_vertex) missing_str = "" progress = ProgressBar(vertices, "Getting spikes for {}".format(label)) for vertex in progress.over(vertices): placement = placements.get_placement_of_vertex(vertex) vertex_slice = graph_mapper.get_slice(vertex) if self.__indexes[SPIKES] is None: neurons_recording = vertex_slice.n_atoms else: neurons_recording = sum((index >= vertex_slice.lo_atom and index <= vertex_slice.hi_atom) for index in self.__indexes[SPIKES]) if neurons_recording == 0: continue # Read the spikes n_words = int(math.ceil(neurons_recording / 32.0)) n_bytes = n_words * self.N_BYTES_PER_WORD n_words_with_timestamp = n_words + 1 # for buffering output info is taken form the buffer manager record_raw, data_missing = buffer_manager.get_data_by_placement( placement, region) if data_missing: missing_str += "({}, {}, {}); ".format(placement.x, placement.y, placement.p) if len(record_raw) > 0: raw_data = (numpy.asarray(record_raw, dtype="uint8").view( dtype="<i4")).reshape([-1, n_words_with_timestamp]) else: raw_data = record_raw if len(raw_data) > 0: record_time = raw_data[:, 0] * float(ms_per_tick) spikes = raw_data[:, 1:].byteswap().view("uint8") bits = numpy.fliplr( numpy.unpackbits(spikes).reshape((-1, 32))).reshape( (-1, n_bytes * 8)) time_indices, local_indices = numpy.where(bits == 1) if self.__indexes[SPIKES] is None: indices = local_indices + vertex_slice.lo_atom times = record_time[time_indices].reshape((-1)) spike_ids.extend(indices) spike_times.extend(times) else: neurons = self._neurons_recording(SPIKES, vertex_slice) n_neurons = len(neurons) for time_indice, local in zip(time_indices, local_indices): if local < n_neurons: spike_ids.append(neurons[local]) spike_times.append(record_time[time_indice]) if len(missing_str) > 0: logger.warning( "Population {} is missing spike data in region {} from the" " following cores: {}".format(label, region, missing_str)) if len(spike_ids) == 0: return numpy.zeros((0, 2), dtype="float") result = numpy.column_stack((spike_ids, spike_times)) return result[numpy.lexsort((spike_times, spike_ids))] def get_recordable_variables(self): return self.__sampling_rates.keys() def is_recording(self, variable): try: return self.__sampling_rates[variable] > 0 except KeyError as e: msg = "Variable {} is not supported. Supported variables are {}" \ "".format(variable, self.get_recordable_variables()) raise_from(ConfigurationException(msg), e) @property def recording_variables(self): results = list() for region, rate in self.__sampling_rates.items(): if rate > 0: results.append(region) return results @property def recorded_region_ids(self): results = list() for id, rate in enumerate(self.__sampling_rates.values()): if rate > 0: results.append(id) return results def _compute_rate(self, sampling_interval): """ Convert a sampling interval into a rate. \ Remember, machine time step is in nanoseconds :param sampling_interval: interval between samples in microseconds :return: rate """ if sampling_interval is None: return 1 step = globals_variables.get_simulator().machine_time_step / 1000 rate = int(sampling_interval / step) if sampling_interval != rate * step: msg = "sampling_interval {} is not an an integer multiple of the "\ "simulation timestep {}".format(sampling_interval, step) raise ConfigurationException(msg) if rate > self.MAX_RATE: msg = "sampling_interval {} higher than max allowed which is {}" \ "".format(sampling_interval, step * self.MAX_RATE) raise ConfigurationException(msg) return rate def check_indexes(self, indexes): if indexes is None: return if len(indexes) == 0: raise ConfigurationException("Empty indexes list") found = False warning = None for index in indexes: if index < 0: raise ConfigurationException( "Negative indexes are not supported") elif index >= self.__n_neurons: warning = "Ignoring indexes greater than population size." else: found = True if warning is not None: logger.warning(warning) if not found: raise ConfigurationException( "All indexes larger than population size") def _turn_off_recording(self, variable, sampling_interval, remove_indexes): if self.__sampling_rates[variable] == 0: # Already off so ignore other parameters return if remove_indexes is None: # turning all off so ignoring sampling interval self.__sampling_rates[variable] = 0 self.__indexes[variable] = None return # No good reason to specify_interval when turning off if sampling_interval is not None: rate = self._compute_rate(sampling_interval) # But if they do make sure it is the same as before if rate != self.__sampling_rates[variable]: raise ConfigurationException( "Illegal sampling_interval parameter while turning " "off recording") if self.__indexes[variable] is None: # start with all indexes self.__indexes[variable] = range(self.__n_neurons) # remove the indexes not recording self.__indexes[variable] = \ [index for index in self.__indexes[variable] if index not in remove_indexes] # Check is at least one index still recording if len(self.__indexes[variable]) == 0: self.__sampling_rates[variable] = 0 self.__indexes[variable] = None def _check_complete_overwrite(self, variable, indexes): if indexes is None: # overwriting all OK! return if self.__indexes[variable] is None: if set(set(range(self.__n_neurons))).issubset(set(indexes)): # overwriting all previous so OK! return else: if set(self.__indexes[variable]).issubset(set(indexes)): # overwriting all previous so OK! return raise ConfigurationException( "Current implementation does not support multiple " "sampling_intervals for {} on one population.".format(variable)) def _turn_on_recording(self, variable, sampling_interval, indexes): rate = self._compute_rate(sampling_interval) if self.__sampling_rates[variable] == 0: # Previously not recording so OK self.__sampling_rates[variable] = rate elif rate != self.__sampling_rates[variable]: self._check_complete_overwrite(variable, indexes) # else rate not changed so no action if indexes is None: # previous recording indexes does not matter as now all (None) self.__indexes[variable] = None else: # make sure indexes is not a generator like range indexes = list(indexes) self.check_indexes(indexes) if self.__indexes[variable] is not None: # merge the two indexes indexes = self.__indexes[variable] + indexes # Avoid duplicates and keep in numerical order self.__indexes[variable] = list(set(indexes)) self.__indexes[variable].sort() def set_recording(self, variable, new_state, sampling_interval=None, indexes=None): if variable == "all": for key in self.__sampling_rates.keys(): self.set_recording(key, new_state, sampling_interval, indexes) elif variable in self.__sampling_rates: if new_state: self._turn_on_recording(variable, sampling_interval, indexes) else: self._turn_off_recording(variable, sampling_interval, indexes) else: raise ConfigurationException( "Variable {} is not supported".format(variable)) def get_buffered_sdram_per_record(self, variable, vertex_slice): """ Return the SDRAM used per record :param variable: :param vertex_slice: :return: """ n_neurons = self._count_recording_per_slice(variable, vertex_slice) if n_neurons == 0: return 0 if variable == SPIKES: # Overflow can be ignored as it is not save if in an extra word out_spike_words = int(math.ceil(n_neurons / 32.0)) out_spike_bytes = out_spike_words * self.N_BYTES_PER_WORD return self.N_BYTES_FOR_TIMESTAMP + out_spike_bytes else: return self.N_BYTES_FOR_TIMESTAMP + \ n_neurons * self.N_BYTES_PER_VALUE def get_buffered_sdram_per_timestep(self, variable, vertex_slice): """ Return the SDRAM used per timestep. In the case where sampling is used it returns the average\ for recording and none recording based on the recording rate :param variable: :param vertex_slice: :return: """ rate = self.__sampling_rates[variable] if rate == 0: return 0 data_size = self.get_buffered_sdram_per_record(variable, vertex_slice) if rate == 1: return data_size else: return data_size // rate def get_sampling_overflow_sdram(self, vertex_slice): """ Get the extra SDRAM that should be reserved if using per_timestep This is the extra that must be reserved if per_timestep is an average\ rather than fixed for every timestep. When sampling the average * time_steps may not be quite enough.\ This returns the extra space in the worst case\ where time_steps is a multiple of sampling rate + 1,\ and recording is done in the first and last time_step :param vertex_slice: :return: Highest possible overflow needed """ overflow = 0 for variable, rate in iteritems(self.__sampling_rates): # If rate is 0 no recording so no overflow # If rate is 1 there is no overflow as average is exact if rate > 1: data_size = self.get_buffered_sdram_per_record( variable, vertex_slice) overflow += data_size // rate * (rate - 1) return overflow def get_buffered_sdram(self, variable, vertex_slice, n_machine_time_steps): """ Returns the SDRAM used for this may timesteps If required the total is rounded up so the space will always fit :param variable: The :param vertex_slice: :return: """ rate = self.__sampling_rates[variable] if rate == 0: return 0 data_size = self.get_buffered_sdram_per_record(variable, vertex_slice) records = n_machine_time_steps // rate if n_machine_time_steps % rate > 0: records = records + 1 return data_size * records def get_sdram_usage_in_bytes(self, vertex_slice): n_words_for_n_neurons = (vertex_slice.n_atoms + 3) // 4 n_bytes_for_n_neurons = n_words_for_n_neurons * 4 return (8 + n_bytes_for_n_neurons) * len(self.__sampling_rates) def _get_fixed_sdram_usage(self, vertex_slice): total_neurons = vertex_slice.hi_atom - vertex_slice.lo_atom + 1 fixed_sdram = 0 # Recording rate for each neuron fixed_sdram += self.N_BYTES_PER_RATE # Number of recording neurons fixed_sdram += self.N_BYTES_PER_INDEX # index_parameters one per neuron # even if not recording as also act as a gate fixed_sdram += self.N_BYTES_PER_INDEX * total_neurons return fixed_sdram def get_variable_sdram_usage(self, vertex_slice): fixed_sdram = 0 per_timestep_sdram = 0 for variable in self.__sampling_rates: rate = self.__sampling_rates[variable] fixed_sdram += self._get_fixed_sdram_usage(vertex_slice) if rate > 0: fixed_sdram += self.SARK_BLOCK_SIZE per_record = self.get_buffered_sdram_per_record( variable, vertex_slice) if rate == 1: # Add size for one record as recording every timestep per_timestep_sdram += per_record else: # Get the average cost per timestep average_per_timestep = per_record / rate per_timestep_sdram += average_per_timestep # Add the rest once to fixed for worst case fixed_sdram += (per_record - average_per_timestep) return VariableSDRAM(fixed_sdram, per_timestep_sdram) def get_dtcm_usage_in_bytes(self, vertex_slice): # *_rate + n_neurons_recording_* + *_indexes usage = self.get_sdram_usage_in_bytes(vertex_slice) # *_count + *_increment usage += len(self.__sampling_rates) * self.N_BYTES_PER_POINTER * 2 # out_spikes, *_values for variable in self.__sampling_rates: if variable == SPIKES: out_spike_words = int(math.ceil(vertex_slice.n_atoms / 32.0)) out_spike_bytes = out_spike_words * self.N_BYTES_PER_WORD usage += self.N_BYTES_FOR_TIMESTAMP + out_spike_bytes else: usage += (self.N_BYTES_FOR_TIMESTAMP + vertex_slice.n_atoms * self.N_BYTES_PER_VALUE) # *_size usage += len(self.__sampling_rates) * self.N_BYTES_PER_SIZE # n_recordings_outstanding usage += self.N_BYTES_PER_WORD * 4 return usage def get_n_cpu_cycles(self, n_neurons): return n_neurons * self.N_CPU_CYCLES_PER_NEURON * \ len(self.recording_variables) def get_data(self, vertex_slice): data = list() n_words_for_n_neurons = (vertex_slice.n_atoms + 3) // 4 n_bytes_for_n_neurons = n_words_for_n_neurons * 4 for variable in self.__sampling_rates: rate = self.__sampling_rates[variable] n_recording = self._count_recording_per_slice( variable, vertex_slice) data.append(numpy.array([rate, n_recording], dtype="uint32")) if rate == 0: data.append(numpy.zeros(n_words_for_n_neurons, dtype="uint32")) elif self.__indexes[variable] is None: data.append( numpy.arange(n_bytes_for_n_neurons, dtype="uint8").view("uint32")) else: indexes = self.__indexes[variable] local_index = 0 local_indexes = list() for index in xrange(n_bytes_for_n_neurons): if index + vertex_slice.lo_atom in indexes: local_indexes.append(local_index) local_index += 1 else: # write to one beyond recording range local_indexes.append(n_recording) data.append( numpy.array(local_indexes, dtype="uint8").view("uint32")) return numpy.concatenate(data) def get_global_parameters(self, vertex_slice): params = [] for variable in self.__sampling_rates: params.append( NeuronParameter(self.__sampling_rates[variable], DataType.UINT32)) for variable in self.__sampling_rates: n_recording = self._count_recording_per_slice( variable, vertex_slice) params.append(NeuronParameter(n_recording, DataType.UINT8)) return params def get_index_parameters(self, vertex_slice): params = [] for variable in self.__sampling_rates: if self.__sampling_rates[variable] <= 0: local_indexes = 0 elif self.__indexes[variable] is None: local_indexes = IndexIsValue() else: local_indexes = [] n_recording = sum( vertex_slice.lo_atom <= index <= vertex_slice.hi_atom for index in self.__indexes[variable]) indexes = self.__indexes[variable] local_index = 0 for index in xrange(vertex_slice.lo_atom, vertex_slice.hi_atom + 1): if index in indexes: local_indexes.append(local_index) local_index += 1 else: # write to one beyond recording range local_indexes.append(n_recording) params.append(NeuronParameter(local_indexes, DataType.UINT8)) return params @property def _indexes(self): # for testing only return _ReadOnlyDict(self.__indexes)
class TorConfig(object): """This class abstracts out Tor's config, and can be used both to create torrc files from nothing and track live configuration of a Tor instance. Also, it gives easy access to all the configuration options present. This is initialized at "bootstrap" time, providing attribute-based access thereafter. Note that after you set some number of items, you need to do a save() before these are sent to Tor (and then they will be done as one SETCONF). You may also use this class to construct a configuration from scratch (e.g. to give to :func:`txtorcon.launch_tor`). In this case, values are reflected right away. (If we're not bootstrapped to a Tor, this is the mode). Note that you do not need to call save() if you're just using TorConfig to create a .torrc file or for input to launch_tor(). This class also listens for CONF_CHANGED events to update the cached data in the event other controllers (etc) changed it. There is a lot of magic attribute stuff going on in here (which might be a bad idea, overall) but the *intent* is that you can just set Tor options and it will all Just Work. For config items that take multiple values, set that to a list. For example:: conf = TorConfig(...) conf.SOCKSPort = [9050, 1337] conf.HiddenServices.append(HiddenService(...)) (Incoming objects, like lists, are intercepted and wrapped). FIXME: when is CONF_CHANGED introduced in Tor? Can we do anything like it for prior versions? FIXME: - HiddenServiceOptions is special: GETCONF on it returns several (well, two) values. Besides adding the two keys 'by hand' do we need to do anything special? Can't we just depend on users doing 'conf.hiddenservicedir = foo' AND 'conf.hiddenserviceport = bar' before a save() ? - once I determine a value is default, is there any way to actually get what this value is? """ @staticmethod @defer.inlineCallbacks def from_protocol(proto): """ This creates and returns a ready-to-go TorConfig instance from the given protocol, which should be an instance of TorControlProtocol. """ cfg = TorConfig(control=proto) yield cfg.post_bootstrap defer.returnValue(cfg) def __init__(self, control=None): self.config = {} '''Current configuration, by keys.''' if control is None: self._protocol = None self.__dict__['_accept_all_'] = None else: self._protocol = ITorControlProtocol(control) self.unsaved = OrderedDict() '''Configuration that has been changed since last save().''' self.parsers = {} '''Instances of the parser classes, subclasses of TorConfigType''' self.list_parsers = set(['hiddenservices', 'ephemeralonionservices']) '''All the names (keys from .parsers) that are a List of something.''' # during bootstrapping we decide whether we support the # following features. A thing goes in here if TorConfig # behaves differently depending upon whether it shows up in # "GETINFO config/names" self._supports = dict(HiddenServiceDirGroupReadable=False) self._defaults = dict() self.post_bootstrap = defer.Deferred() if self.protocol: if self.protocol.post_bootstrap: self.protocol.post_bootstrap.addCallback( self.bootstrap).addErrback(self.post_bootstrap.errback) else: self.bootstrap() else: self.do_post_bootstrap(self) self.__dict__['_setup_'] = None def socks_endpoint(self, reactor, port=None): """ Returns a TorSocksEndpoint configured to use an already-configured SOCKSPort from the Tor we're connected to. By default, this will be the very first SOCKSPort. :param port: a str, the first part of the SOCKSPort line (that is, a port like "9151" or a Unix socket config like "unix:/path". You may also specify a port as an int. If you need to use a particular port that may or may not already be configured, see the async method :meth:`txtorcon.TorConfig.create_socks_endpoint` """ if len(self.SocksPort) == 0: raise RuntimeError("No SOCKS ports configured") socks_config = None if port is None: socks_config = self.SocksPort[0] else: port = str(port) # in case e.g. an int passed in if ' ' in port: raise ValueError( "Can't specify options; use create_socks_endpoint instead") for idx, port_config in enumerate(self.SocksPort): # "SOCKSPort" is a gnarly beast that can have a bunch # of options appended, so we have to split off the # first thing which *should* be the port (or can be a # string like 'unix:') if port_config.split()[0] == port: socks_config = port_config break if socks_config is None: raise RuntimeError( "No SOCKSPort configured for port {}".format(port)) return _endpoint_from_socksport_line(reactor, socks_config) @defer.inlineCallbacks def create_socks_endpoint(self, reactor, socks_config): """ Creates a new TorSocksEndpoint instance given a valid configuration line for ``SocksPort``; if this configuration isn't already in the underlying tor, we add it. Note that this method may call :meth:`txtorcon.TorConfig.save()` on this instance. Note that calling this with `socks_config=None` is equivalent to calling `.socks_endpoint` (which is not async). XXX socks_config should be .. i dunno, but there's f*****g options and craziness, e.g. default Tor Browser Bundle is: ['9150 IPv6Traffic PreferIPv6 KeepAliveIsolateSOCKSAuth', '9155'] XXX maybe we should say "socks_port" as the 3rd arg, insist it's an int, and then allow/support all the other options (e.g. via kwargs) XXX we could avoid the "maybe call .save()" thing; worth it? (actually, no we can't or the Tor won't have it config'd) """ yield self.post_bootstrap if socks_config is None: if len(self.SocksPort) == 0: raise RuntimeError( "socks_port is None and Tor has no SocksPorts configured") socks_config = self.SocksPort[0] else: if not any([socks_config in port for port in self.SocksPort]): # need to configure Tor self.SocksPort.append(socks_config) try: yield self.save() except TorProtocolError as e: extra = '' if socks_config.startswith('unix:'): # XXX so why don't we check this for the # caller, earlier on? extra = '\nNote Tor has specific ownership/permissions ' +\ 'requirements for unix sockets and parent dir.' raise RuntimeError( "While configuring SOCKSPort to '{}', error from" " Tor: {}{}".format(socks_config, e, extra)) defer.returnValue(_endpoint_from_socksport_line(reactor, socks_config)) # FIXME should re-name this to "tor_protocol" to be consistent # with other things? Or rename the other things? """ read-only access to TorControlProtocol. Call attach_protocol() to set it, which can only be done if we don't already have a protocol. """ def _get_protocol(self): return self.__dict__['_protocol'] protocol = property(_get_protocol) tor_protocol = property(_get_protocol) def attach_protocol(self, proto): """ returns a Deferred that fires once we've set this object up to track the protocol. Fails if we already have a protocol. """ if self._protocol is not None: raise RuntimeError("Already have a protocol.") # make sure we have nothing in self.unsaved self.save() self.__dict__['_protocol'] = proto # FIXME some of this is duplicated from ctor del self.__dict__['_accept_all_'] self.__dict__['post_bootstrap'] = defer.Deferred() if proto.post_bootstrap: proto.post_bootstrap.addCallback(self.bootstrap) return self.__dict__['post_bootstrap'] def __setattr__(self, name, value): """ we override this so that we can provide direct attribute access to our config items, and move them into self.unsaved when they've been changed. hiddenservices have to be special unfortunately. the _setup_ thing is so that we can set up the attributes we need in the constructor without uusing __dict__ all over the place. """ # appease flake8's hatred of lambda :/ def has_setup_attr(o): return '_setup_' in o.__dict__ def has_accept_all_attr(o): return '_accept_all_' in o.__dict__ def is_hidden_services(s): return s.lower() == "hiddenservices" if has_setup_attr(self): name = self._find_real_name(name) if not has_accept_all_attr(self) and not is_hidden_services(name): value = self.parsers[name].validate(value, self, name) if isinstance(value, list): value = _ListWrapper( value, functools.partial(self.mark_unsaved, name)) name = self._find_real_name(name) self.unsaved[name] = value else: super(TorConfig, self).__setattr__(name, value) def _maybe_create_listwrapper(self, rn): if rn.lower() in self.list_parsers and rn not in self.config: self.config[rn] = _ListWrapper([], functools.partial( self.mark_unsaved, rn)) def __getattr__(self, name): """ on purpose, we don't return self.unsaved if the key is in there because I want the config to represent the running Tor not ``things which might get into the running Tor if save() were to be called'' """ rn = self._find_real_name(name) if '_accept_all_' in self.__dict__ and rn in self.unsaved: return self.unsaved[rn] self._maybe_create_listwrapper(rn) v = self.config[rn] if v == DEFAULT_VALUE: v = self.__dict__['_defaults'].get(rn, DEFAULT_VALUE) return v def __contains__(self, item): if item in self.unsaved and '_accept_all_' in self.__dict__: return True return item in self.config def __iter__(self): ''' FIXME needs proper iterator tests in test_torconfig too ''' for x in self.config.__iter__(): yield x for x in self.__dict__['unsaved'].__iter__(): yield x def get_type(self, name): """ return the type of a config key. :param: name the key FIXME can we do something more-clever than this for client code to determine what sort of thing a key is? """ # XXX FIXME uhm...how to do all the different types of hidden-services? if name.lower() == 'hiddenservices': return FilesystemOnionService return type(self.parsers[name]) def _conf_changed(self, arg): """ internal callback. from control-spec: 4.1.18. Configuration changed The syntax is: StartReplyLine *(MidReplyLine) EndReplyLine StartReplyLine = "650-CONF_CHANGED" CRLF MidReplyLine = "650-" KEYWORD ["=" VALUE] CRLF EndReplyLine = "650 OK" Tor configuration options have changed (such as via a SETCONF or RELOAD signal). KEYWORD and VALUE specify the configuration option that was changed. Undefined configuration options contain only the KEYWORD. """ conf = parse_keywords(arg, multiline_values=False) for (k, v) in conf.items(): # v will be txtorcon.DEFAULT_VALUE already from # parse_keywords if it was unspecified real_name = self._find_real_name(k) if real_name in self.parsers: v = self.parsers[real_name].parse(v) self.config[real_name] = v def bootstrap(self, arg=None): ''' This only takes args so it can be used as a callback. Don't pass an arg, it is ignored. ''' try: d = self.protocol.add_event_listener('CONF_CHANGED', self._conf_changed) except RuntimeError: # for Tor versions which don't understand CONF_CHANGED # there's nothing we can really do. log.msg( "Can't listen for CONF_CHANGED event; won't stay up-to-date " "with other clients.") d = defer.succeed(None) d.addCallback(lambda _: self.protocol.get_info_raw("config/names")) d.addCallback(self._do_setup) d.addCallback(self.do_post_bootstrap) d.addErrback(self.do_post_errback) def do_post_errback(self, f): self.post_bootstrap.errback(f) return None def do_post_bootstrap(self, arg): if not self.post_bootstrap.called: self.post_bootstrap.callback(self) return self def needs_save(self): return len(self.unsaved) > 0 def mark_unsaved(self, name): name = self._find_real_name(name) if name in self.config and name not in self.unsaved: self.unsaved[name] = self.config[self._find_real_name(name)] def save(self): """ Save any outstanding items. This returns a Deferred which will errback if Tor was unhappy with anything, or callback with this TorConfig object on success. """ if not self.needs_save(): return defer.succeed(self) args = [] directories = [] for (key, value) in self.unsaved.items(): if key == 'HiddenServices': self.config['HiddenServices'] = value # using a list here because at least one unit-test # cares about order -- and conceivably order *could* # matter here, to Tor... services = list() # authenticated services get flattened into the HiddenServices list... for hs in value: if IOnionClient.providedBy(hs): parent = IOnionClient(hs).parent if parent not in services: services.append(parent) elif isinstance( hs, (EphemeralOnionService, EphemeralHiddenService)): raise ValueError( "Only filesystem based Onion services may be added" " via TorConfig.hiddenservices; ephemeral services" " must be created with 'create_onion_service'.") else: if hs not in services: services.append(hs) for hs in services: for (k, v) in hs.config_attributes(): if k == 'HiddenServiceDir': if v not in directories: directories.append(v) args.append(k) args.append(v) else: raise RuntimeError( "Trying to add hidden service with same HiddenServiceDir: %s" % v) else: args.append(k) args.append(v) continue if isinstance(value, list): for x in value: # FIXME XXX if x is not DEFAULT_VALUE: args.append(key) args.append(str(x)) else: args.append(key) args.append(value) # FIXME in future we should wait for CONF_CHANGED and # update then, right? real_name = self._find_real_name(key) if not isinstance(value, list) and real_name in self.parsers: value = self.parsers[real_name].parse(value) self.config[real_name] = value # FIXME might want to re-think this, but currently there's no # way to put things into a config and get them out again # nicely...unless you just don't assign a protocol if self.protocol: d = self.protocol.set_conf(*args) d.addCallback(self._save_completed) return d else: self._save_completed() return defer.succeed(self) def _save_completed(self, *args): '''internal callback''' self.__dict__['unsaved'] = {} return self def _find_real_name(self, name): keys = list(self.__dict__['parsers'].keys()) + list( self.__dict__['config'].keys()) for x in keys: if x.lower() == name.lower(): return x return name @defer.inlineCallbacks def _get_defaults(self): try: defaults_raw = yield self.protocol.get_info_raw("config/defaults") defaults = {} for line in defaults_raw.split('\n')[1:]: k, v = line.split(' ', 1) if k in defaults: if isinstance(defaults[k], list): defaults[k].append(v) else: defaults[k] = [defaults[k], v] else: defaults[k] = v except TorProtocolError: # must be a version of Tor without config/defaults defaults = dict() defer.returnValue(defaults) @defer.inlineCallbacks def _do_setup(self, data): defaults = self.__dict__['_defaults'] = yield self._get_defaults() for line in data.split('\n'): if line == "config/names=": continue (name, value) = line.split() if name in self._supports: self._supports[name] = True if name == 'HiddenServiceOptions': # set up the "special-case" hidden service stuff servicelines = yield self.protocol.get_conf_raw( 'HiddenServiceOptions') self._setup_hidden_services(servicelines) continue # there's a whole bunch of FooPortLines (where "Foo" is # "Socks", "Control", etc) and some have defaults, some # don't but they all have FooPortLines, FooPort, and # __FooPort definitions so we only "do stuff" for the # "FooPortLines" if name.endswith('PortLines'): rn = self._find_real_name(name[:-5]) self.parsers[rn] = String() # not Port() because options etc self.list_parsers.add(rn) v = yield self.protocol.get_conf(name[:-5]) v = v[name[:-5]] initial = [] if v == DEFAULT_VALUE or v == 'auto': try: initial = defaults[name[:-5]] except KeyError: default_key = '__{}'.format(name[:-5]) default = yield self.protocol.get_conf_single( default_key) if not default: initial = [] else: initial = [default] else: initial = [self.parsers[rn].parse(v)] self.config[rn] = _ListWrapper( initial, functools.partial(self.mark_unsaved, rn)) # XXX for Virtual check that it's one of the *Ports things # (because if not it should be an error) if value in ('Dependant', 'Dependent', 'Virtual'): continue # there's a thing called "Boolean+Auto" which is -1 for # auto, 0 for false and 1 for true. could be nicer if it # was called AutoBoolean or something, but... value = value.replace('+', '_') inst = None # FIXME: put parser classes in dict instead? for cls in config_types: if cls.__name__ == value: inst = cls() if not inst: raise RuntimeError("Don't have a parser for: " + value) v = yield self.protocol.get_conf(name) v = v[name] rn = self._find_real_name(name) self.parsers[rn] = inst if is_list_config_type(inst.__class__): self.list_parsers.add(rn) parsed = self.parsers[rn].parse(v) if parsed == [DEFAULT_VALUE]: parsed = defaults.get(rn, []) self.config[rn] = _ListWrapper( parsed, functools.partial(self.mark_unsaved, rn)) else: if v == '' or v == DEFAULT_VALUE: parsed = self.parsers[rn].parse( defaults.get(rn, DEFAULT_VALUE)) else: parsed = self.parsers[rn].parse(v) self.config[rn] = parsed # get any ephemeral services we own, or detached services. # these are *not* _ListWrappers because we don't care if they # change, nothing in Tor's config exists for these (probably # begging the question: why are we putting them in here at all # then...?) try: ephemeral = yield self.protocol.get_info('onions/current') except Exception: self.config['EphemeralOnionServices'] = [] else: onions = [] for line in ephemeral['onions/current'].split('\n'): onion = line.strip() if onion: onions.append( EphemeralOnionService( self, ports=[], # no way to discover ports= hostname=onion, private_key=DISCARD, # we don't know it, anyway version=2, detach=False, )) self.config['EphemeralOnionServices'] = onions try: detached = yield self.protocol.get_info('onions/detached') except Exception: self.config['DetachedOnionServices'] = [] else: onions = [] for line in detached['onions/detached'].split('\n'): onion = line.strip() if onion: onions.append( EphemeralOnionService( self, ports=[], # no way to discover original ports= hostname=onion, detach=True, private_key=DISCARD, )) self.config['DetachedOnionServices'] = onions defer.returnValue(self) def _setup_hidden_services(self, servicelines): def maybe_add_hidden_service(): if directory is not None: if directory not in directories: directories.append(directory) if not auth: service = FilesystemOnionService( self, directory, ports, ver, group_read) hs.append(service) else: auth_type, clients = auth.split(' ', 1) clients = clients.split(',') if auth_type == 'basic': auth0 = AuthBasic(clients) elif auth_type == 'stealth': auth0 = AuthStealth(clients) else: raise ValueError( "Unknown auth type '{}'".format(auth_type)) parent_service = FilesystemAuthenticatedOnionService( self, directory, ports, auth0, ver, group_read) for client_name in parent_service.client_names(): hs.append(parent_service.get_client(client_name)) else: raise RuntimeError( "Trying to add hidden service with same HiddenServiceDir: %s" % directory) hs = [] directory = None directories = [] ports = [] ver = None group_read = None auth = None for line in servicelines.split('\n'): if not len(line.strip()): continue if line == 'HiddenServiceOptions': continue k, v = line.split('=') if k == 'HiddenServiceDir': maybe_add_hidden_service() directory = v _directory = directory directory = os.path.abspath(directory) if directory != _directory: warnings.warn( "Directory path: %s changed to absolute path: %s" % (_directory, directory), RuntimeWarning) ports = [] ver = None auth = None group_read = 0 elif k == 'HiddenServicePort': ports.append(v) elif k == 'HiddenServiceVersion': ver = int(v) elif k == 'HiddenServiceAuthorizeClient': if auth is not None: # definitely error, or keep going? raise ValueError( "Multiple HiddenServiceAuthorizeClient lines for one service" ) auth = v elif k == 'HiddenServiceDirGroupReadable': group_read = int(v) else: raise RuntimeError("Can't parse HiddenServiceOptions: " + k) maybe_add_hidden_service() name = 'HiddenServices' self.config[name] = _ListWrapper( hs, functools.partial(self.mark_unsaved, name)) def config_args(self): ''' Returns an iterator of 2-tuples (config_name, value), one for each configuration option in this config. This is more-or-less an internal method, but see, e.g., launch_tor()'s implementation if you think you need to use this for something. See :meth:`txtorcon.TorConfig.create_torrc` which returns a string which is also a valid ``torrc`` file ''' everything = dict() everything.update(self.config) everything.update(self.unsaved) for (k, v) in list(everything.items()): if type(v) is _ListWrapper: if k.lower() == 'hiddenservices': for x in v: for (kk, vv) in x.config_attributes(): yield (str(kk), str(vv)) else: # FIXME actually, is this right? don't we want ALL # the values in one string?! for x in v: yield (str(k), str(x)) else: yield (str(k), str(v)) def create_torrc(self): rtn = StringIO() for (k, v) in self.config_args(): rtn.write(u'%s %s\n' % (k, v)) return rtn.getvalue()