def report(self): """Access to the report data. Returns: pandas.DataFrame: A DataFrame containing the data from the report. Row's indices are the different timestamps and the column's MultiIndex are : - (population_name, node_id, compartment id) for the CompartmentReport - (population_name, node_id) for the SomaReport """ res = pd.DataFrame() for population in self.frame_report.population_names: frames = self.frame_report[population] try: ids = frames.nodes.ids(group=self.group) except BluepySnapError: continue data = frames.get(group=ids, t_start=self.t_start, t_stop=self.t_stop) if data.empty: continue new_index = tuple( tuple([population] + ensure_list(x)) for x in data.columns) data.columns = pd.MultiIndex.from_tuples(new_index) # need to do this in order to preserve MultiIndex for columns res = data if res.empty else data.join(res, how="outer") return res.sort_index().sort_index(axis=1)
def _circuit_mask(data, population_name, queries): """Handle the population, node ID queries.""" populations = queries.pop(POPULATION_KEY, None) if populations is not None and population_name not in set(utils.ensure_list(populations)): ids = [] else: ids = queries.pop(NODE_ID_KEY, queries.pop(EDGE_ID_KEY, None)) return queries, _positional_mask(data, ids)
def ids(self, group=None, limit=None, sample=None, raise_missing_property=True): """Edge IDs corresponding to edges ``edge_ids``. Args: group (None/int/CircuitEdgeId/CircuitEdgeIds/sequence): Which IDs will be returned depends on the type of the ``group`` argument: - ``None``: return all IDs. - ``int``, ``CircuitEdgeId``: return a single edge ID. - ``CircuitEdgeIds`` return IDs of edges the edge population in an array. - ``sequence``: return IDs of edges in an array. sample (int): If specified, randomly choose ``sample`` number of IDs from the match result. If the size of the sample is greater than the size of the EdgePopulation then all ids are taken and shuffled. limit (int): If specified, return the first ``limit`` number of IDs from the match result. If limit is greater than the size of the population all node IDs are returned. raise_missing_property (bool): if True, raises if a property is not listed in this population. Otherwise the ids are just not selected if a property is missing. Returns: numpy.array: A numpy array of IDs. """ if group is None: result = self._population.select_all().flatten() elif isinstance(group, CircuitEdgeIds): result = group.filter_population(self.name).get_ids() elif isinstance(group, np.ndarray): result = group elif isinstance(group, Mapping): result = self._edge_ids_by_filter( queries=group, raise_missing_prop=raise_missing_property) else: result = utils.ensure_list(group) # test if first value is a CircuitEdgeId if yes then all values must be CircuitEdgeId if isinstance(first(result, None), CircuitEdgeId): try: result = [ cid.id for cid in result if cid.population == self.name ] except AttributeError as e: raise BluepySnapError( "All values from a list must be of type int or CircuitEdgeId." ) from e if sample is not None: if len(result) > 0: result = np.random.choice(result, min(sample, len(result)), replace=False) if limit is not None: result = result[:limit] return utils.ensure_ids(result)
def _locate(self, population): """Returns the index indices corresponding to a given population. Args: population (str): the population name you want to locate inside the MultiIndex. Returns: numpy.array: indices corresponding to the population. """ try: return self.index.get_locs(utils.ensure_list(population)) except KeyError: return []
def _circuit_mask(self, queries): """Handle the population, node ID and node set queries.""" populations = queries.pop(POPULATION_KEY, None) if populations is not None and self.name not in set( utils.ensure_list(populations)): node_ids = [] else: node_ids = queries.pop(NODE_ID_KEY, None) node_set = queries.pop(NODE_SET_KEY, None) if node_set is not None: if not isinstance(node_set, six.string_types): raise BluepySnapError( "{} is not a valid node set name.".format(node_set)) node_ids = node_ids if node_ids else self._data.index.values node_ids = np.intersect1d(node_ids, self.ids(node_set)) return queries, self._positional_mask(node_ids)
def _positional_mask(self, node_ids): """Positional mask for the node IDs. Args: node_ids (None/numpy.ndarray): the ids array. If None all ids are selected. Examples: if the data set contains 5 nodes: _positional_mask([0,2]) --> [True, False, True, False, False] """ if node_ids is None: return np.full(len(self._data), fill_value=True) mask = np.full(len(self._data), fill_value=False) valid_node_ids = pd.Index(utils.ensure_list(node_ids)).intersection( self._data.index) mask[valid_node_ids] = True return mask
def get(self, group=None, properties=None): """Node properties as a pandas Series or DataFrame. Args: group (int/CircuitNodeId/CircuitNodeIds/sequence/str/mapping/None): Which nodes will have their properties returned depends on the type of the ``group`` argument: - ``int``, ``CircuitNodeId``: return the properties of a single node. - ``CircuitNodeIds`` return the properties from a NodeCircuitNodeIds. - ``sequence``: return the properties from a list of node. - ``str``: return the properties of nodes in a node set. - ``mapping``: return the properties of nodes matching a properties filter. - ``None``: return the properties of all nodes. properties (list): If specified, return only the properties in the list. Otherwise return all properties. Returns: value/pandas.Series/pandas.DataFrame: If single node ID is passed as ``group`` and single property as properties returns a single value. If single node ID is passed as ``group`` and list as property returns a pandas Series. Otherwise return a pandas DataFrame indexed by node IDs. Notes: The NodePopulation.property_names function will give you all the usable properties for the `properties` argument. """ result = self._data if properties is not None: for p in utils.ensure_list(properties): self._check_property(p) result = result[properties] if group is not None: if isinstance(group, six.integer_types + (np.integer, )): self._check_id(group) elif isinstance(group, CircuitNodeId): group = self.ids(group)[0] else: group = self.ids(group) result = result.loc[group] return result
def get(self, group=None, properties=None): """Node properties as a pandas DataFrame. Args: group (CircuitNodeIds/int/sequence/str/mapping/None): Which nodes will have their properties returned depends on the type of the ``group`` argument: See :py:class:`~bluepysnap.nodes.Nodes.ids`. properties (str/list): If specified, return only the properties in the list. Otherwise return all properties. Returns: pandas.DataFrame: Return a pandas DataFrame indexed by NodeCircuitIds containing the properties from ``properties``. Notes: The NodePopulation.property_names function will give you all the usable properties for the `properties` argument. """ ids = self.ids(group) if properties is None: properties = self.property_names properties = utils.ensure_list(properties) unknown_props = set(properties) - self.property_names if unknown_props: raise BluepySnapError( "Unknown properties required: {}".format(unknown_props)) res = pd.DataFrame(index=ids.index, columns=properties) for name, pop in self.items(): global_pop_ids = ids.filter_population(name) pop_ids = global_pop_ids.get_ids() pop_properties = set(properties) & pop.property_names # indices from NodePopulation and Node get functions are different so I cannot use # a dataframe equal directly and properties have different types so cannot use a multi # dim numpy array for prop in pop_properties: res.loc[global_pop_ids.index, prop] = pop.get(pop_ids, properties=prop).to_numpy() return res.sort_index()
def get(self, group=None, properties=None): """Returns the properties of a the NetworkObject.""" ids = self.ids(group) properties = utils.ensure_list(properties) # We don t convert to set properties itself to keep the column order. properties_set = set(properties) unknown_props = properties_set - self.property_names if unknown_props: raise BluepySnapError( f"Unknown properties required: {unknown_props}") res = pd.DataFrame(index=ids.index, columns=properties) for name, pop in self.items(): global_pop_ids = ids.filter_population(name) pop_ids = global_pop_ids.get_ids() pop_properties = properties_set & pop.property_names # indices from Population and get functions are different so I cannot # use a dataframe equal directly and properties have different types so cannot use a # multi dim numpy array for prop in pop_properties: res.loc[global_pop_ids.index, prop] = pop.get(pop_ids, prop).to_numpy() return res.sort_index()
def ids(self, group=None, limit=None, sample=None, raise_missing_property=True): """Node IDs corresponding to node ``group``. Args: group (int/CircuitNodeId/CircuitNodeIds/sequence/str/mapping/None): Which IDs will be returned depends on the type of the ``group`` argument: - ``int``, ``CircuitNodeId``: return a single node ID if it belongs to the circuit. - ``CircuitNodeIds`` return IDs of nodes in an array. - ``sequence``: return IDs of nodes in an array. - ``str``: return IDs of nodes in a node set. - ``mapping``: return IDs of nodes matching a properties filter. - ``None``: return all node IDs. If ``group`` is a ``sequence``, the order of results is preserved. Otherwise the result is sorted and contains no duplicates. sample (int): If specified, randomly choose ``sample`` number of IDs from the match result. If the size of the sample is greater than the size of the NodePopulation then all ids are taken and shuffled. limit (int): If specified, return the first ``limit`` number of IDs from the match result. If limit is greater than the size of the population all node IDs are returned. raise_missing_property (bool): if True, raises if a property is not listed in this population. Otherwise the ids are just not selected if a property is missing. Returns: numpy.array: A numpy array of IDs. Examples: The available group parameter values: >>> nodes.ids(group=None) # returns all IDs >>> nodes.ids(group={}) # returns all IDs >>> nodes.ids(group=1) # returns the single ID if present in population >>> # returns the single ID if present in population and the circuit id population >>> # corresponds to nodes.name >>> nodes.ids(group=CircuitNodeId('pop', 1)) >>> nodes.ids(group=[1,2,3]) # returns list of IDs if all present in population >>> # returns list of IDs if all present in population >>> nodes.ids(group=CircuitNodeIds.from_dict({"pop": [0, 1,2]})) >>> nodes.ids(group="node_set_name") # returns list of IDs matching node set >>> nodes.ids(group={ Node.LAYER: 2}) # returns list of IDs matching layer==2 >>> nodes.ids(group={ Node.LAYER: [2, 3]}) # returns list of IDs with layer in [2,3] >>> nodes.ids(group={ Node.X: (0, 1)}) # returns list of IDs with 0 < x < 1 >>> # returns list of IDs matching one of the queries inside the 'or' list >>> nodes.ids(group={'$or': [{ Node.LAYER: [2, 3]}, >>> { Node.X: (0, 1), Node.MTYPE: 'L1_SLAC' }]}) >>> # returns list of IDs matching all the queries inside the 'and' list >>> nodes.ids(group={'$and': [{ Node.LAYER: [2, 3]}, >>> { Node.X: (0, 1), Node.MTYPE: 'L1_SLAC' }]}) """ # pylint: disable=too-many-branches preserve_order = False if isinstance(group, six.string_types): group = self._get_node_set(group) elif isinstance(group, CircuitNodeIds): group = group.filter_population(self.name).get_ids() if group is None: result = self._data.index.values elif isinstance(group, collections.Mapping): result = self._node_ids_by_filter( queries=group, raise_missing_prop=raise_missing_property) elif isinstance(group, np.ndarray): result = group self._check_ids(result) preserve_order = True else: result = utils.ensure_list(group) if isinstance(next(iter(result), None), CircuitNodeId): try: result = [ cid.id for cid in result if cid.population == self.name ] except AttributeError: raise BluepySnapError( "All values from a list must be of type int or " "CircuitNodeId.") self._check_ids(result) preserve_order = isinstance(group, collections.Sequence) if sample is not None: if len(result) > 0: result = np.random.choice(result, min(sample, len(result)), replace=False) preserve_order = False if limit is not None: result = result[:limit] result = np.array(result, dtype=np.int64) if preserve_order: return result else: return np.unique(result)
def test_ensure_list(): assert test_module.ensure_list(1) == [1] assert test_module.ensure_list([1]) == [1] assert test_module.ensure_list(iter([1])) == [1] assert test_module.ensure_list((2, 1)) == [2, 1] assert test_module.ensure_list('abc') == ['abc']
def get(self, group=None, properties=None): """Node properties as a pandas Series or DataFrame. Args: group (int/CircuitNodeId/CircuitNodeIds/sequence/str/mapping/None): Which nodes will have their properties returned depends on the type of the ``group`` argument: - ``int``, ``CircuitNodeId``: return the properties of a single node. - ``CircuitNodeIds`` return the properties from a NodeCircuitNodeIds. - ``sequence``: return the properties from a list of node. - ``str``: return the properties of nodes in a node set. - ``mapping``: return the properties of nodes matching a properties filter. - ``None``: return the properties of all nodes. properties (list|str|None): If specified, return only the properties in the list. Otherwise return all properties. Returns: value/pandas.Series/pandas.DataFrame: The type of the returned object depends on the type of the input parameters, see the Examples for an explanation of the different cases. Notes: The NodePopulation.property_names function will give you all the usable properties for the `properties` argument. Examples: Considering a node population composed by 3 nodes (0, 1, 2) and 12 properties, the following examples show the types of the returned objects. - If ``group`` is a single node ID and ``properties`` a single property, returns a single scalar value. >>> result = my_node_population.get(group=0, properties=Cell.MTYPE) >>> type(result) str - If ``group`` is a single node ID and ``properties`` a list or None, returns a pandas Series indexed by the properties. >>> result = my_node_population.get(group=0) >>> type(result), result.shape (pandas.core.series.Series, (12,)) >>> result = my_node_population.get(group=0, properties=[Cell.MTYPE]) >>> type(result), result.shape (pandas.core.series.Series, (1,)) - If ``group`` is anything other than a single node ID, and ``properties`` is a single property, returns a pandas Series indexed by node IDs. >>> result = my_node_population.get(properties=Cell.MTYPE) >>> type(result), result.shape (pandas.core.series.Series, (3,)) >>> result = my_node_population.get(group=[0], properties=Cell.MTYPE) >>> type(result), result.shape (pandas.core.series.Series, (1,)) - In all the other cases, returns a pandas DataFrame indexed by node IDs. >>> result = my_node_population.get() >>> type(result), result.shape (pandas.core.frame.DataFrame, (3, 12)) >>> result = my_node_population.get(group=[0]) >>> type(result), result.shape (pandas.core.frame.DataFrame, (1, 12)) >>> result = my_node_population.get(properties=[Cell.MTYPE]) >>> type(result), result.shape (pandas.core.frame.DataFrame, (3, 1)) >>> result = my_node_population.get(group=[0], properties=[Cell.MTYPE]) >>> type(result), result.shape (pandas.core.frame.DataFrame, (1, 1)) """ result = self._data if properties is not None: for p in utils.ensure_list(properties): self._check_property(p) result = result[properties] if group is not None: if isinstance(group, (int, np.integer)): self._check_id(group) elif isinstance(group, CircuitNodeId): group = self.ids(group)[0] else: group = self.ids(group) result = result.loc[group] return result
def ids(self, group=None, limit=None, sample=None): """Node IDs corresponding to node ``group``. Args: group (int/sequence/str/mapping/None): Which IDs will be returned depends on the type of the ``group`` argument: - ``int``: return a single node ID if it belongs to the circuit. - ``sequence``: return IDs of nodes in an array. - ``str``: return IDs of nodes in a node set. - ``mapping``: return IDs of nodes matching a properties filter. - ``None``: return all node IDs. If ``group`` is a ``sequence``, the order of results is preserved. Otherwise the result is sorted and contains no duplicates. sample (int): If specified, randomly choose ``sample`` number of IDs from the match result. limit (int): If specified, return the first ``limit`` number of IDs from the match result. Returns: numpy.array: A numpy array of IDs. Examples: The available group parameter values: >>> nodes.ids(group=None) # returns all IDs >>> nodes.ids(group={}) # returns all IDs >>> nodes.ids(group=1) # returns the single ID if present in population >>> nodes.ids(group=[1,2,3]) # returns list of IDs if all present in population >>> nodes.ids(group="node_set_name") # returns list of IDs matching node set >>> nodes.ids(group={ Node.LAYER: 2}) # returns list of IDs matching layer==2 >>> nodes.ids(group={ Node.LAYER: [2, 3]}) # returns list of IDs with layer in [2,3] >>> nodes.ids(group={ Node.X: (0, 1)}) # returns list of IDs with 0 < x < 1 >>> # returns list of IDs matching one of the queries inside the 'or' list >>> nodes.ids(group={'$or': [{ Node.LAYER: [2, 3]}, >>> { Node.X: (0, 1), Node.MTYPE: 'L1_SLAC' }]}) >>> # returns list of IDs matching all the queries inside the 'and' list >>> nodes.ids(group={'$and': [{ Node.LAYER: [2, 3]}, >>> { Node.X: (0, 1), Node.MTYPE: 'L1_SLAC' }]}) """ preserve_order = False if isinstance(group, six.string_types): group = self._get_node_set(group) if group is None: result = self._data.index.values elif isinstance(group, collections.Mapping): result = self._node_ids_by_filter(queries=group) elif isinstance(group, np.ndarray): result = group self._check_ids(result) preserve_order = True else: result = utils.ensure_list(group) self._check_ids(result) preserve_order = isinstance(group, collections.Sequence) if sample is not None: if len(result) > 0: result = np.random.choice(result, sample, replace=False) preserve_order = False if limit is not None: result = result[:limit] result = np.array(result, dtype=np.int64) if preserve_order: return result else: return np.unique(result)
def test_ensure_list(): assert test_module.ensure_list(1) == [1] assert test_module.ensure_list([1]) == [1] assert test_module.ensure_list(iter([1])) == [1] assert test_module.ensure_list((2, 1)) == [2, 1] assert test_module.ensure_list("abc") == ["abc"]