def json2neuron(s, **kwargs): """ Load neuron from JSON string. Parameters ---------- s : str JSON-formatted string. **kwargs Parameters passed to ``json.loads()`` and ``pandas.DataFrame.read_json()``. Returns ------- :class:`~pymaid.CatmaidNeuronList` See Also -------- :func:`~pymaid.neuron2json` Turn neuron into json. """ if not isinstance(s, str): raise TypeError('Need str, got "{0}"'.format(type(s))) data = json.loads(s, **kwargs) nl = core.CatmaidNeuronList([]) for n in data: # Make sure we have all we need REQUIRED = ['skeleton_id'] missing = [p for p in REQUIRED if p not in n] if missing: raise ValueError('Missing data: {0}'.format(','.join(missing))) cn = core.CatmaidNeuron(int(n['skeleton_id'])) if 'nodes' in n: cn.nodes = pd.read_json(n['nodes']) cn.connectors = pd.read_json(n['connectors']) for key in n: if key in ['skeleton_id', 'nodes', 'connectors']: continue setattr(cn, key, n[key]) nl += cn return nl
def neuron2py(neuron, remote_instance=None): """ Converts an rcatmaid ``neuron`` or ``neuronlist`` object to a PyMaid :class:`~pymaid.CatmaidNeuron`/:class:`~pymaid.CatmaidNeuronList`. Notes ----- Node creator and confidence are not included in R's neuron/neuronlist and will be imported as ``None``. Parameters ---------- neuron : R neuron | R neuronlist Neuron to convert to Python remote_instance : CATMAID instance, optional Provide if you want neuron names to be updated from. Returns ------- pymaid CatmaidNeuronList """ if 'rpy2' in str(type(neuron)): if cl(neuron)[0] == 'neuronlist': neuron_list = pd.DataFrame( data=[[data2py(e) for e in n] for n in neuron], columns=list(neuron[0].names)) # neuron_list.columns = data.names #[ 'NumPoints', # 'StartPoint','BranchPoints','EndPoints','nTrees', 'NumSeqs', # 'SegList', 'd', 'skid', 'connectors', 'tags','url', 'headers' ] if 'df' in neuron.slots: neuron_list['name'] = neuron.slots['df'][2] else: neuron_list['name'] = ['NA'] * neuron_list.shape[0] elif cl(neuron)[0] == 'catmaidneuron' or cl(neuron)[0] == 'neuron': neuron_list = pd.DataFrame( data=[[e for e in neuron]], columns=neuron.names) neuron_list = neuron_list.applymap(data2py) neuron_list['name'] = ['NA'] # neuron_list.columns = neuron.names #[ 'NumPoints', # 'StartPoint','BranchPoints','EndPoints','nTrees', 'NumSeqs', # 'SegList', 'd', 'skid', 'connectors', 'tags','url', 'headers' ] neuron = neuron_list remote_instance = utils._eval_remote_instance(remote_instance, raise_error=False) # Nat function may return neuron objects that have ONLY nodes - no # connectors, skeleton_id, name or tags! if 'skid' in neuron and remote_instance: neuron_names = fetch.get_names( [n[0] for n in neuron.skid.tolist()], remote_instance) elif 'skid' in neuron and not remote_instance: neuron_names = None logger.info( 'Please provide a remote instance if you want to add neuron name.') else: logger.warning( 'Neuron has only nodes (no name, skid, connectors or tags).') data = [] for i in range(neuron.shape[0]): # Note that radius is divided by 2 -> this is because in rcatmaid the # original radius is doubled for some reason nodes = pd.DataFrame([[no.PointNo, no.Parent, None, no.X, no.Y, no.Z, no.W / 2, None] for no in neuron.loc[i, 'd'].itertuples()], dtype=object) nodes.columns = ['treenode_id', 'parent_id', 'creator_id', 'x', 'y', 'z', 'radius', 'confidence'] nodes.loc[nodes.parent_id == -1, 'parent_id'] = None if 'connectors' in neuron: connectors = pd.DataFrame([[cn.treenode_id, cn.connector_id, cn.prepost, cn.x, cn.y, cn.z] for cn in neuron.loc[i, 'connectors'].itertuples()], dtype=object) connectors.columns = ['treenode_id', 'connector_id', 'relation', 'x', 'y', 'z'] else: connectors = pd.DataFrame( columns=['treenode_id', 'connector_id', 'relation', 'x', 'y', 'z']) if 'skid' in neuron: skid = neuron.loc[i, 'skid'][0] else: skid = 'NA' data.append([skid, nodes, connectors ]) df = pd.DataFrame(data=data, columns=['skeleton_id', 'nodes', 'connectors'], dtype=object ) df['igraph'] = None if 'tags' in neuron: df['tags'] = neuron.tags.tolist() else: df['tags'] = [{} for n in df.skeleton_id.tolist()] if 'skid' in neuron and neuron_names is not None: df['neuron_name'] = [neuron_names[ str(n)] for n in df.skeleton_id.tolist()] else: df['neuron_name'] = ['NA' for n in df.skeleton_id.tolist()] return core.CatmaidNeuronList(df, remote_instance=remote_instance)
def get_team_contributions(teams, neurons=None, remote_instance=None): """ Get contributions by teams: nodes, reviews, connectors, time invested. Notes ----- 1. Time calculation uses defaults from :func:`pymaid.get_time_invested`. 2. ``total_reviews`` > ``total_nodes`` is possible if nodes have been reviewed multiple times by different users. Similarly, ``total_reviews`` = ``total_nodes`` does not imply that the neuron is fully reviewed! Parameters ---------- teams dict Teams to group contributions for. Users must be logins. Format can be either: 1. Simple user assignments. For example:: {'teamA': ['user1', 'user2'], 'team2': ['user3'], ...]} 2. Users with start and end dates. Start and end date must be either ``datetime.date`` or a single ``pandas.date_range`` object. For example:: {'team1': { 'user1': (datetime.date(2017, 1, 1), datetime.date(2018, 1, 1)), 'user2': (datetime.date(2016, 6, 1), datetime.date(2017, 1, 1) } 'team2': { 'user3': pandas.date_range('2017-1-1', '2018-1-1'), }} Mixing both styles is permissible. For second style, use e.g. ``'user1': None`` for no date restrictions on that user. neurons skeleton ID(s) | CatmaidNeuron/List, optional Restrict check to given set of neurons. If CatmaidNeuron/List, will use this neurons nodes/ connectors. Use to subset contributions e.g. to a given neuropil by pruning neurons before passing to this function. remote_instance : Catmaid Instance, optional Either pass explicitly or define globally. Returns ------- pandas.DataFrame DataFrame in which each row represents a neuron. Example for two teams, ``teamA`` and ``teamB``: >>> df skeleton_id total_nodes teamA_nodes teamB_nodes ... 0 1 total_reviews teamA_reviews teamB_reviews ... 0 1 total_connectors teamA_connectors teamB_connectors ... 0 1 total_time teamA_time teamB_time 0 1 Examples -------- >>> from datetime import date >>> import pandas as pd >>> teams = {'teamA': ['user1', 'user2'], ... 'teamB': {'user3': None, ... 'user4': (date(2017, 1, 1), date(2018, 1, 1))}, ... 'teamC': {'user5': pd.date_range('2015-1-1', '2018-1-1')}} >>> stats = pymaid.get_team_contributions(teams) See Also -------- :func:`~pymaid.get_contributor_statistics` Gives you more basic info on neurons of interest such as total reconstruction/review time. :func:`~pymaid.get_time_invested` Time invested by individual users. Gives you more control over how time is calculated. """ remote_instance = utils._eval_remote_instance(remote_instance) # Prepare teams if not isinstance(teams, dict): raise TypeError('Expected teams of type dict, got {}'.format(type(teams))) beginning_of_time = datetime.date(1900, 1, 1) today = datetime.date.today() all_time = pd.date_range(beginning_of_time, today) for t in teams: if isinstance(teams[t], list): teams[t] = {u: all_time for u in teams[t]} elif isinstance(teams[t], dict): for u in teams[t]: if isinstance(teams[t][u], type(None)): teams[t][u] = all_time elif isinstance(teams[t][u], (tuple, list)): try: teams[t][u] = pd.date_range(*teams[t][u]) except BaseException: raise Exception('Error converting "{}" to pandas.date_range'.format(teams[t][u])) elif isinstance(teams[t][u], pd.core.indexes.datetimes.DatetimeIndex): pass else: TypeError('Expected user dates to be either None, tuple of datetimes or pandas.date_range, got {}'.format(type(teams[t][u]))) else: raise TypeError('Expected teams to be either lists or dicts of users, got {}'.format(type(teams[t]))) # Get all users all_users = [u for t in teams for u in teams[t]] # Prepare neurons - download if neccessary if not isinstance(neurons, type(None)): if isinstance(neurons, core.CatmaidNeuron): neurons = core.CatmaidNeuronList(neurons) elif isinstance(neurons, core.CatmaidNeuronList): pass else: neurons = fetch.get_neurons(neurons) else: all_dates = [d.date() for t in teams for u in teams[t] for d in teams[t][u]] neurons = fetch.find_neurons(users=all_users, from_date=min(all_dates), to_date=max(all_dates)) neurons.get_skeletons() # Get user list user_list = fetch.get_user_list(remote_instance).set_index('login') for u in all_users: if u not in user_list.index: raise ValueError('User "{}" not found in user list'.format(u)) # Get all node details all_node_details = fetch.get_node_details(neurons, remote_instance=remote_instance) # Get connector links link_details = fetch.get_connector_links(neurons) # link_details contains all links. We have to subset this to existing # connectors in case the input neurons have been pruned link_details = link_details[link_details.connector_id.isin(neurons.connectors.connector_id.values)] interval = 3 bin_width = '%iMin' % interval minimum_actions = 10 * interval stats = [] for n in config.tqdm(neurons, desc='Processing', disable=config.pbar_hide, leave=config.pbar_leave): # Get node details tn_ids = n.nodes.treenode_id.values.astype(str) cn_ids = n.connectors.connector_id.values.astype(str) current_status = config.pbar_hide config.pbar_hide = True node_details = all_node_details[all_node_details.node_id.isin(np.append(tn_ids, cn_ids))] config.pbar_hide = current_status # Extract node creation node_creation = node_details.loc[node_details.node_id.isin(tn_ids), ['creator', 'creation_time']].values node_creation = np.c_[node_creation, ['node_creation'] * node_creation.shape[0]] # Extract connector creation cn_creation = node_details.loc[node_details.node_id.isin(cn_ids), ['creator', 'creation_time']].values cn_creation = np.c_[cn_creation, ['cn_creation'] * cn_creation.shape[0]] # Extract edition times (treenodes + connectors) node_edits = node_details.loc[:, ['editor', 'edition_time']].values node_edits = np.c_[node_edits, ['editor'] * node_edits.shape[0]] # Link creation link_creation = link_details.loc[link_details.connector_id.isin(cn_ids), ['creator_id', 'creation_time']].values link_creation = np.c_[link_creation, ['link_creation'] * link_details.shape[0]] # Extract review times reviewers = [u for l in node_details.reviewers.values for u in l] timestamps = [ts for l in node_details.review_times.values for ts in l] node_review = np.c_[reviewers, timestamps, ['review'] * len(reviewers)] # Merge all timestamps (ignore edits for now) to get time_invested all_ts = pd.DataFrame(np.vstack([node_creation, node_review, cn_creation, link_creation, node_edits]), columns=['user', 'timestamp', 'type']) # Add column with just the date and make it the index all_ts['date'] = [v.date() for v in all_ts.timestamp.astype(datetime.date).values] all_ts.index = pd.to_datetime(all_ts.date) # Fill in teams for each timestamp based on user + date all_ts['team'] = None for t in teams: for u in teams[t]: # Assign all timestamps by this user in the right time to # this team existing_dates = (teams[t][u] & all_ts.index).unique() ss = (all_ts.index.isin(existing_dates)) & (all_ts.user.values == user_list.loc[u, 'id']) all_ts.loc[ss, 'team'] = t # Get total total_time = sum(all_ts.timestamp.to_frame().set_index( 'timestamp', drop=False).groupby(pd.Grouper(freq=bin_width)).count().values >= minimum_actions)[0] * interval this_neuron = [n.skeleton_id, n.n_nodes, n.n_connectors, node_review.shape[0], total_time] # Go over the teams and collect values for t in teams: # Subset to team this_team = all_ts[all_ts.team == t] if this_team.shape[0] > 0: # Subset to user ID team_time = sum(this_team.timestamp.to_frame().set_index( 'timestamp', drop=False).groupby(pd.Grouper(freq=bin_width)).count().values >= minimum_actions)[0] * interval team_nodes = this_team[this_team['type'] == 'node_creation'].shape[0] team_cn = this_team[this_team['type'] == 'cn_creation'].shape[0] team_rev = this_team[this_team['type'] == 'review'].shape[0] else: team_nodes = team_cn = team_rev = team_time = 0 this_neuron += [team_nodes, team_cn, team_rev, team_time] stats.append(this_neuron) cols = ['skeleton_id', 'total_nodes', 'total_connectors', 'total_reviews', 'total_time'] for t in teams: for s in ['nodes', 'connectors', 'reviews', 'time']: cols += ['{}_{}'.format(t, s)] stats = pd.DataFrame(stats, columns=cols) cols_ordered = ['skeleton_id'] + ['{}_{}'.format(t, v) for v in ['nodes', 'connectors', 'reviews', 'time']for t in ['total'] + list(teams)] stats = stats[cols_ordered] return stats
def get_time_invested(x, remote_instance=None, minimum_actions=10, treenodes=True, connectors=True, mode='SUM', max_inactive_time=3, start_date=None, end_date=None): """ Takes a list of neurons and calculates the time individual users have spent working on this set of neurons. Parameters ---------- x Which neurons to check. Can be either: 1. skeleton IDs (int or str) 2. neuron name (str, must be exact match) 3. annotation: e.g. 'annotation:PN right' 4. CatmaidNeuron or CatmaidNeuronList object If you pass a CatmaidNeuron/List, its data is used calculate time invested. You can exploit this to get time invested into a given compartment of a neurons, e.g. by pruning it to a volume. remote_instance : CatmaidInstance, optional Either pass explicitly or define globally. minimum_actions : int, optional Minimum number of actions per minute to be counted as active. treenodes : bool, optional If False, treenodes will not be taken into account connectors : bool, optional If False, connectors will not be taken into account mode : 'SUM' | 'OVER_TIME' | 'ACTIONS', optional (1) 'SUM' will return total time invested (in minutes) per user. (2) 'OVER_TIME' will return minutes invested/day over time. (3) 'ACTIONS' will return actions (node/connectors placed/edited) per day. max_inactive_time : int, optional Maximal time inactive in minutes. start_date : None | tuple | datetime.date, optional end_date : None | tuple | datetime.date, optional Restricts time invested to window. Applies to creation but not edition time! Returns ------- pandas.DataFrame If ``mode='SUM'``, values represent minutes invested. >>> df ... total creation edition review ... user1 ... user2 If ``mode='OVER_TIME'`` or ``mode='ACTIONS'``: >>> df ... date date date ... ... user1 ... user2 For `OVER_TIME`, values respresent minutes invested on that day. For `ACTIONS`, values represent actions (creation, edition, review) on that day. Important --------- Creation/Edition/Review times can overlap! This is why total time spent is not just creation + edition + review. Please note that this does currently not take placement of pre-/postsynaptic nodes into account! Be aware of the ``minimum_actions`` parameter: at low settings even a single actions (e.g. connecting a node) will add considerably to time invested. To keep total reconstruction time comparable to what Catmaid calculates, you should consider about 10 actions/minute (= a click every 6 seconds) and ``max_inactive_time`` of 3 mins. CATMAID gives reconstruction time across all users. Here, we calculate the time spent tracing for individuals. This may lead to a discrepancy between sum of time invested over of all users from this function vs. CATMAID's reconstruction time. Examples -------- Plot pie chart of contributions per user using Plotly. This example assumes that you have already imported and set up pymaid. >>> import plotly >>> stats = pymaid.get_time_invested(skids, remote_instance) >>> # Use plotly to generate pie chart >>> fig = {"data": [{"values": stats.total.tolist(), ... "labels": stats.user.tolist(), "type" : "pie" }]} >>> plotly.offline.plot(fig) Plot reconstruction efforts over time: >>> stats = pymaid.get_time_invested(skids, mode='OVER_TIME') >>> # Plot time invested over time >>> stats.T.plot() >>> # Plot cumulative time invested over time >>> stats.T.cumsum(axis=0).plot() >>> # Filter for major contributors >>> stats[stats.sum(axis=1) > 20].T.cumsum(axis=0).plot() """ def _extract_timestamps(ts, desc='Calc'): grouped = ts.set_index('timestamp', drop=False).groupby(['user', pd.Grouper(freq=bin_width)]).count() >= minimum_actions temp_stats = {} for u in config.tqdm(set(ts.user.unique()) & set(relevant_users), desc=desc, disable=config.pbar_hide, leave=False): temp_stats[u] = sum(grouped.loc[u].values)[0] * interval return temp_stats if mode not in ['SUM', 'OVER_TIME', 'ACTIONS']: raise ValueError('Unknown mode "%s"' % str(mode)) remote_instance = utils._eval_remote_instance(remote_instance) skids = utils.eval_skids(x, remote_instance) # Maximal inactive time is simply translated into binning # We need this later for pandas.TimeGrouper() anyway interval = max_inactive_time bin_width = '%iMin' % interval # Update minimum_actions to reflect actions/interval instead of actions/minute minimum_actions *= interval user_list = fetch.get_user_list(remote_instance).set_index('id') if not isinstance(x, (core.CatmaidNeuron, core.CatmaidNeuronList)): x = fetch.get_neuron(skids, remote_instance=remote_instance) if isinstance(x, core.CatmaidNeuron): skdata = core.CatmaidNeuronList(x) elif isinstance(x, core.CatmaidNeuronList): skdata = x if not isinstance(end_date, (datetime.date, type(None))): end_date = datetime.date(*end_date) if not isinstance(start_date, (datetime.date, type(None))): start_date = datetime.date(*start_date) # Extract connector and node IDs node_ids = [] connector_ids = [] for n in skdata.itertuples(): if treenodes: node_ids += n.nodes.treenode_id.tolist() if connectors: connector_ids += n.connectors.connector_id.tolist() # Get node details node_details = fetch.get_node_details( node_ids + connector_ids, remote_instance=remote_instance) # Get details for links link_details = fetch.get_connector_links(skdata) # link_details contains all links. We have to subset this to existing # connectors in case the input neurons have been pruned link_details = link_details[link_details.connector_id.isin(connector_ids)] # Remove timestamps outside of date range (if provided) if start_date: node_details = node_details[node_details.creation_time >= np.datetime64( start_date)] link_details = link_details[link_details.creation_time >= np.datetime64( start_date)] if end_date: node_details = node_details[node_details.creation_time <= np.datetime64( end_date)] link_details = link_details[link_details.creation_time <= np.datetime64( end_date)] # Dataframe for creation (i.e. the actual generation of the nodes) creation_timestamps = np.append(node_details[['creator', 'creation_time']].values, link_details[['creator_id', 'creation_time']].values, axis=0) creation_timestamps = pd.DataFrame(creation_timestamps, columns=['user', 'timestamp']) # Dataframe for edition times - can't use links as there is no editor edition_timestamps = node_details[['editor', 'edition_time']] edition_timestamps.columns = ['user', 'timestamp'] # Generate dataframe for reviews reviewers = [u for l in node_details.reviewers.values for u in l] timestamps = [ts for l in node_details.review_times.values for ts in l] review_timestamps = pd.DataFrame([[u, ts] for u, ts in zip( reviewers, timestamps)], columns=['user', 'timestamp']) # Merge all timestamps all_timestamps = pd.concat( [creation_timestamps, edition_timestamps, review_timestamps], axis=0) all_timestamps.sort_values('timestamp', inplace=True) relevant_users = all_timestamps.groupby('user').count() relevant_users = relevant_users[relevant_users.timestamp >= minimum_actions].index.values if mode == 'SUM': stats = { 'total': {u: 0 for u in relevant_users}, 'creation': {u: 0 for u in relevant_users}, 'edition': {u: 0 for u in relevant_users}, 'review': {u: 0 for u in relevant_users} } stats['total'].update(_extract_timestamps(all_timestamps, desc='Calc total')) stats['creation'].update(_extract_timestamps(creation_timestamps, desc='Calc creation')) stats['edition'].update(_extract_timestamps(edition_timestamps, desc='Calc edition')) stats['review'].update(_extract_timestamps(review_timestamps, desc='Calc review')) return pd.DataFrame([[user_list.loc[u, 'login'], stats['total'][u], stats['creation'][u], stats['edition'][u], stats['review'][u]] for u in relevant_users], columns=['user', 'total', 'creation', 'edition', 'review']).sort_values('total', ascending=False).reset_index(drop=True).set_index('user') elif mode == 'ACTIONS': all_ts = all_timestamps.set_index('timestamp', drop=False).timestamp.groupby( pd.Grouper(freq='1d')).count().to_frame() all_ts.columns = ['all_users'] all_ts = all_ts.T # Get total time spent for u in config.tqdm(all_timestamps.user.unique(), desc='Calc. total', disable=config.pbar_hide, leave=False): this_ts = all_timestamps[all_timestamps.user == u].set_index( 'timestamp', drop=False).timestamp.groupby(pd.Grouper(freq='1d')).count().to_frame() this_ts.columns = [user_list.loc[u, 'login']] all_ts = pd.concat([all_ts, this_ts.T]) return all_ts.fillna(0) elif mode == 'OVER_TIME': # First count all minutes with minimum number of actions minutes_counting = (all_timestamps.set_index('timestamp', drop=False).timestamp.groupby( pd.Grouper(freq=bin_width)).count().to_frame() > minimum_actions) # Then remove the minutes that have less than minimum actions minutes_counting = minutes_counting[minutes_counting.timestamp == True] # Now group by hour all_ts = minutes_counting.groupby(pd.Grouper(freq='1d')).count() all_ts.columns = ['all_users'] all_ts = all_ts.T # Get total time spent for u in config.tqdm(all_timestamps.user.unique(), desc='Calc. total', disable=config.pbar_hide, leave=False): minutes_counting = (all_timestamps[all_timestamps.user == u].set_index( 'timestamp', drop=False).timestamp.groupby(pd.Grouper(freq=bin_width)).count().to_frame() > minimum_actions) minutes_counting = minutes_counting[minutes_counting.timestamp == True] this_ts = minutes_counting.groupby(pd.Grouper(freq='1d')).count() this_ts.columns = [user_list.loc[u, 'login']] all_ts = pd.concat([all_ts, this_ts.T]) all_ts.fillna(0, inplace=True) return all_ts
def adjacency_from_connectors(source, target=None, remote_instance=None): """ Regenerates adjacency matrices from neurons' connectors. Notes ----- This function creates an adjacency matrix from scratch using just the neurons' connectors. This function is able to deal with non-unique skeleton IDs (most other functions are not). Use it e.g. when you split neurons into multiple fragments. Parameters ---------- source,target : skeleton IDs | CatmaidNeuron | CatmaidNeuronList Neuron(s) for which to generate adjacency matrix. If ``target==None``, will use ``target=source``. Returns ------- pandas.DataFrame Matrix holding possible synaptic contacts. Sources are rows, targets are columns. Labels are skeleton IDs. Order is preserved. >>> df target1 target2 target3 ... source1 5 1 0 source2 10 20 5 source3 4 3 15 ... See Also -------- :func:`~pymaid.adjacency_matrix` If you are working with "intact" neurons. Much faster! :func:`~pymaid.filter_connectivity` Use this function if you have only a single fragment per neuron (e.g. just the axon). Also way faster. Examples -------- >>> # Fetch some neurons >>> x = pymaid.get_neuron('annotation:PD2a1/b1') >>> # Split into axon / dendrites >>> x.reroot(x.soma) >>> split = pymaid.split_axon_dendrite(x) >>> # Regenerate all-by-all adjacency matrix >>> adj = pymaid.adjacency_from_connectors(split) >>> # Skeleton IDs are non-unique but column/row order = input order: >>> # in this example, the first occurrence is axon, the second dendrites >>> adj.head() """ remote_instance = utils._eval_remote_instance(remote_instance) if not isinstance(source, (core.CatmaidNeuron, core.CatmaidNeuronList)): skids = utils.eval_skids(source) source = fetch.get_neuron(skids, remote_instance=remote_instance) if isinstance(target, type(None)): target = source elif not isinstance(target, (core.CatmaidNeuron, core.CatmaidNeuronList)): skids = utils.eval_skids(target) target = fetch.get_neuron(skids, remote_instance=remote_instance) if isinstance(source, core.CatmaidNeuron): source = core.CatmaidNeuronList(source) if isinstance(target, core.CatmaidNeuron): target = core.CatmaidNeuronList(target) # Generate empty adjacency matrix adj = np.zeros((len(source), len(target))) # Get connector details for all neurons all_cn = list( set( np.append(source.connectors.connector_id.values, target.connectors.connector_id.values))) cn_details = fetch.get_connector_details(all_cn) # Now go over all source neurons and process connections for i, s in enumerate( config.tqdm(source, desc='Processing', disable=config.pbar_hide, leave=config.pbar_leave)): # Get all connectors presynaptic for this source this_cn = cn_details[ (cn_details.presynaptic_to == int(s.skeleton_id)) & (cn_details.connector_id.isin(s.connectors.connector_id))] # Go over all target neurons for k, t in enumerate(target): t_tn = set(t.nodes.treenode_id.values) t_post = t.postsynapses.connector_id.values # Extract number of connections from source to this target this_t = this_cn[this_cn.connector_id.isin(t_post)] # Now figure out how many links are between this connector and # the target n_links = sum([ len(t_tn & set(r.postsynaptic_to_node)) for r in this_t.itertuples() ]) adj[i][k] = n_links return pd.DataFrame(adj, index=source.skeleton_id, columns=target.skeleton_id)
def cn_table_from_connectors(x, remote_instance=None): """ Generate connectivity table from neurons' connectors. Notes ----- This function creates the connectivity table from scratch using just the neurons' connectors. This function is able to deal with non-unique skeleton IDs (most other functions won't). Use it e.g. when you split neurons into multiple fragments. *The order of the input CatmaidNeuronList is preserved!* Parameters ---------- x : CatmaidNeuron | CatmaidNeuronList Neuron(s) for which to generate connectivity table. Returns ------- pandas.DataFrame DataFrame in which each row represents a neuron and the number of synapses with the query neurons: >>> df neuron_name skeleton_id relation total skid1 skid2 ... 0 name1 skid1 upstream n_syn n_syn ... 1 name2 skid2 downstream n_syn n_syn .. 2 name3 skid3 usptream n_syn n_syn . ... ... ``relation`` can be ``'upstream'`` (incoming), ``'downstream'`` (outgoing), ``'attachment'`` or ``'gapjunction'`` (gap junction). See Also -------- :func:`~pymaid.get_partners` If you are working with "intact" neurons. Much faster! :func:`~pymaid.filter_connectivity` Use this function if you have only a single fragment per neuron (e.g. just the axon). Also way faster. Examples -------- >>> # Fetch some neurons >>> x = pymaid.get_neuron('annotation:PD2a1/b1') >>> # Split into axon / dendrites >>> x.reroot(x.soma) >>> split = pymaid.split_axon_dendrite(x) >>> # Regenerate cn_table >>> cn_table = pymaid.cn_table_from_connectors(split) >>> # Skeleton IDs are non-unique but column order = input order: >>> # in this example, the first occurrence is axon, the second dendrites >>> cn_table.head() """ remote_instance = utils._eval_remote_instance(remote_instance) if not isinstance(x, (core.CatmaidNeuron, core.CatmaidNeuronList)): raise TypeError('Need CatmaidNeuron/List, got "{}"'.format(type(x))) if isinstance(x, core.CatmaidNeuron): x = core.CatmaidNeuronList(x) # Get connector details for all neurons all_cn = x.connectors.connector_id.values cn_details = fetch.get_connector_details(all_cn) # Remove connectors for which there are either no pre- or no postsynaptic # neurons cn_details = cn_details[cn_details.postsynaptic_to.apply(len) != 0] cn_details = cn_details[~cn_details.presynaptic_to.isnull()] # We need to map treenode ID to skeleton ID in cases where there are more # links (postsynaptic_to_node) than targets (postsynaptic_to) multi_links = cn_details[cn_details.postsynaptic_to.apply(len) < cn_details.postsynaptic_to_node.apply(len)] if not multi_links.empty: tn_to_fetch = [ tn for l in multi_links.postsynaptic_to_node for tn in l ] tn_to_skid = fetch.get_skid_from_treenode( tn_to_fetch, remote_instance=remote_instance) else: tn_to_skid = {} # Collect all pre and postsynaptic neurons all_pre = cn_details[~cn_details.presynaptic_to. isin(x.skeleton_id.astype(int))] all_post = cn_details[cn_details.presynaptic_to.isin( x.skeleton_id.astype(int))] all_partners = np.append( all_pre.presynaptic_to.values, [n for l in all_post.postsynaptic_to.values for n in l]) us_dict = {} ds_dict = {} # Go over over all neurons and process connectivity for i, n in enumerate( config.tqdm(x, desc='Processing', disable=config.pbar_hide, leave=config.pbar_leave)): # First prepare upstream partners: # Get all treenodes this_tn = set(n.nodes.treenode_id.values) # Prepare upstream partners this_us = all_pre[all_pre.connector_id.isin( n.connectors.connector_id.values)] # Get the number of all links per connector this_us['n_links'] = [ len(this_tn & set(r.postsynaptic_to_node)) for r in this_us.itertuples() ] # Group by input and store as dict us_dict[n] = this_us.groupby('presynaptic_to').n_links.sum().to_dict() this_us = this_us.groupby('presynaptic_to').n_links.sum() # Now prepare downstream partners: # Get all downstream connectors this_ds = all_post[all_post.presynaptic_to == int(n.skeleton_id)] # Prepare dict ds_dict[n] = {p: 0 for p in all_partners} # Easy cases first (single link to target per connector) is_single = this_ds.postsynaptic_to.apply( len) >= this_ds.postsynaptic_to_node.apply(len) for r in this_ds[is_single].itertuples(): for s in r.postsynaptic_to: ds_dict[n][s] += 1 # Now hard cases - will have to look up skeleton ID via treenode ID for r in this_ds[~is_single].itertuples(): for s in r.postsynaptic_to_node: ds_dict[n][tn_to_skid[s]] += 1 # Now that we have all data, let's generate the table us_table = pd.DataFrame.from_dict(us_dict) ds_table = pd.DataFrame.from_dict(ds_dict) # Make sure we keep the order of the original neuronlist us_table = us_table[[n for n in x]] us_table.columns = [n.skeleton_id for n in us_table.columns] ds_table = ds_table[[n for n in x]] ds_table.columns = [n.skeleton_id for n in ds_table.columns] ds_table['relation'] = 'downstream' us_table['relation'] = 'upstream' # Generate table cn_table = pd.concat([us_table, ds_table], axis=0) # Replace NaN with 0 cn_table = cn_table.fillna(0) # Make skeleton ID a column cn_table = cn_table.reset_index(drop=False) cn_table.columns = ['skeleton_id'] + list(cn_table.columns[1:]) # Add names names = fetch.get_names(cn_table.skeleton_id.values) cn_table['neuron_name'] = [ names[str(s)] for s in cn_table.skeleton_id.values ] cn_table['total'] = cn_table[x.skeleton_id].sum(axis=1) # Drop rows with 0 synapses (e.g. if neuron is only up- but not downstream) cn_table = cn_table[cn_table.total > 0] # Sort by number of synapses cn_table = cn_table.sort_values(['relation', 'total'], ascending=False) # Sort columnes cn_table = cn_table[['neuron_name', 'skeleton_id', 'relation', 'total'] + list(set(x.skeleton_id))] return cn_table
def predict_connectivity(source, target, method='possible_contacts', remote_instance=None, **kwargs): """ Calculates potential synapses from source onto target neurons. Based on a concept by Alex Bates. Parameters ---------- source,target : CatmaidNeuron | CatmaidNeuronList Neuron(s) for which to compute potential connectivity. This is unidirectional: source -> target. method : 'possible_contacts' Method to use for calculations. See Notes. **kwargs 1. For method = 'possible_contacts': - ``dist`` to set distance between connectors and treenodes manually. - ``stdev`` to set number of standard-deviations of average distance. Default = 2. Notes ----- Method ``possible_contacts``: 1. Calculating mean distance ``d`` (connector->treenode) at which connections between neurons A and neurons B occur. 2. For all presynapses of neurons A, check if they are within `stdev` (default=2) standard deviations of ``d`` of a neurons B treenode. Returns ------- pandas.DataFrame Matrix holding possible synaptic contacts. Sources are rows, targets are columns. >>> df target1 target2 target3 ... source1 5 1 0 source2 10 20 5 source3 4 3 15 ... """ remote_instance = utils._eval_remote_instance(remote_instance) if not remote_instance: try: remote_instance = source._remote_instance except: pass for _ in [source, target]: if not isinstance(_, (core.CatmaidNeuron, core.CatmaidNeuronList)): raise TypeError('Need CatmaidNeuron/List, got "{}"'.format( type(_))) if isinstance(source, core.CatmaidNeuron): source = core.CatmaidNeuronList(source) if isinstance(target, core.CatmaidNeuron): target = core.CatmaidNeuronList(target) allowed_methods = ['possible_contacts'] if method not in allowed_methods: raise ValueError('Unknown method "{0}". Allowed methods: "{0}"'.format( method, ','.join(allowed_methods))) matrix = pd.DataFrame(np.zeros((source.shape[0], target.shape[0])), index=source.skeleton_id, columns=target.skeleton_id) # First let's calculate at what distance synapses are being made cn_between = fetch.get_connectors_between(source, target, remote_instance=remote_instance) if kwargs.get('dist', None): distances = kwargs.get('dist') elif cn_between.shape[0] > 0: logger.warning('No ') cn_locs = np.vstack(cn_between.connector_loc.values) tn_locs = np.vstack(cn_between.treenode2_loc.values) distances = np.sqrt(np.sum((cn_locs - tn_locs)**2, axis=1)) logger.info( 'Average connector->treenode distances: {:.2f} +/- {:.2f} nm'. format(distances.mean(), distances.std())) else: logger.warning('No existing connectors to calculate average \ connector->treenode distance found. Falling \ back to default of 1um. Use <stdev> argument\ to set manually.') distances = 1000 # Calculate distances threshold n_std = kwargs.get('n_std', 2) dist_threshold = np.mean(distances) + n_std * np.std(distances) with config.tqdm(total=len(target), desc='Predicting', disable=config.pbar_hide, leave=config.pbar_leave) as pbar: for t in target: # Create cKDTree for target tree = scipy.spatial.cKDTree(t.nodes[['x', 'y', 'z']].values, leafsize=10) for s in source: # Query against presynapses dist, ix = tree.query(s.presynapses[['x', 'y', 'z']].values, k=1, distance_upper_bound=dist_threshold, n_jobs=-1) # Calculate possible contacts possible_contacts = sum(dist != float('inf')) matrix.at[s.skeleton_id, t.skeleton_id] = possible_contacts pbar.update(1) return matrix.astype(int)
def cable_overlap(a, b, dist=2, method='min'): """ Calculates the amount of cable of neuron A within distance of neuron B. Uses dotproduct representation of a neuron! Parameters ---------- a,b : CatmaidNeuron | CatmaidNeuronList Neuron(s) for which to compute cable within distance. dist : int, optional Maximum distance in microns [um]. method : 'min' | 'max' | 'avg' Method by which to calculate the overlapping cable between two cables. Assuming that neurons A and B have 300 and 150 um of cable within given distances, respectively: 1. 'min' returns 150 2. 'max' returns 300 3. 'avg' returns 225 Returns ------- pandas.DataFrame Matrix in which neurons A are rows, neurons B are columns. Cable within distance is given in microns. >>> df skidB1 skidB2 skidB3 ... skidA1 5 1 0 skidA2 10 20 5 skidA3 4 3 15 ... """ # Convert distance to nm dist *= 1000 if not isinstance( a, (core.CatmaidNeuron, core.CatmaidNeuronList)) or not isinstance( b, (core.CatmaidNeuron, core.CatmaidNeuronList)): raise TypeError('Need to pass CatmaidNeurons') if isinstance(a, core.CatmaidNeuron): a = core.CatmaidNeuronList(a) if isinstance(b, core.CatmaidNeuron): b = core.CatmaidNeuronList(b) allowed_methods = ['min', 'max', 'avg'] if method not in allowed_methods: raise ValueError('Unknown method "{0}". Allowed methods: "{0}"'.format( method, ','.join(allowed_methods))) matrix = pd.DataFrame(np.zeros((a.shape[0], b.shape[0])), index=a.skeleton_id, columns=b.skeleton_id) with config.tqdm(total=len(a), desc='Calc. overlap', disable=config.pbar_hide, leave=config.pbar_leave) as pbar: # Keep track of KDtrees trees = {} for nA in a: # Get cKDTree for nA tA = trees.get(nA.skeleton_id, None) if not tA: trees[nA.skeleton_id] = tA = scipy.spatial.cKDTree(np.vstack( nA.dps.point), leafsize=10) for nB in b: # Get cKDTree for nB tB = trees.get(nB.skeleton_id, None) if not tB: trees[nB.skeleton_id] = tB = scipy.spatial.cKDTree( np.vstack(nB.dps.point), leafsize=10) # Query nB -> nA distA, ixA = tA.query(np.vstack(nB.dps.point), k=1, distance_upper_bound=dist, n_jobs=-1) # Query nA -> nB distB, ixB = tB.query(np.vstack(nA.dps.point), k=1, distance_upper_bound=dist, n_jobs=-1) nA_in_dist = nA.dps.loc[ixA[distA != float('inf')]] nB_in_dist = nB.dps.loc[ixB[distB != float('inf')]] if nA_in_dist.empty: overlap = 0 elif method == 'avg': overlap = (nA_in_dist.vec_length.sum() + nB_in_dist.vec_length.sum()) / 2 elif method == 'max': overlap = max(nA_in_dist.vec_length.sum(), nB_in_dist.vec_length.sum()) elif method == 'min': overlap = min(nA_in_dist.vec_length.sum(), nB_in_dist.vec_length.sum()) matrix.at[nA.skeleton_id, nB.skeleton_id] = overlap pbar.update(1) # Convert to um matrix /= 1000 return matrix
def take_snapshot(x, skeleton_data=True, cn_table=False, node_details=False, adjacency_matrix=True, remote_instance=None, cn_details=True, annotations=False): """ Take a snapshot of CATMAID data associated with a set of neurons. Important --------- If you pass Catmaidneuron/List that have been modified (e.g. pruned), other data (e.g. connectivity, etc) will be subset as well if applicable. If your CatmaidNeuron/List is still naive, you might want to just pass the skeleton ID(s) to speed things up. Parameters ---------- x : skeleton IDs | CatmaidNeuron/List Neurons for which to retrieve data. skeleton_data : bool, optional Include 3D skeleton data. cn_table : bool, optional Include connectivity table. Covers all neurons connected to input neurons. node_details : bool, optional Include treenode and connector details. adjacency_matrix : bool, optional Include adjacency matrix covering the input neurons. cn_details : bool, optional Include connector details. annotations : bool, optional Include neuron annotations. remote_instance : Catmaid Instance, optional Either pass explicitly or define globally. Will obviously not be added to the snapshot! Returns ------- pandas Series Examples -------- See Also -------- :func:`~pymaid.load_snapshot` Use to load a snapshot file. """ remote_instance = utils._eval_remote_instance(remote_instance) skids = utils.eval_skids(x, remote_instance) snapshot = pd.Series() # Add Coordinates Universal Time snapshot['utc_date'] = datetime.datetime.utcnow() # Add pymaid version snapshot['pymaid_version'] = init.__version__ # Add skeleton data if skeleton_data: if not isinstance(x, (core.CatmaidNeuronList, core.CatmaidNeuron)): skdata = fetch.get_neurons(skids, remote_instance=remote_instance) else: skdata = x if isinstance(skdata, core.CatmaidNeuron): skdata = core.CatmaidNeuronList(skdata) snapshot['skeleton_data'] = skdata.to_dataframe() # Add connectivity table if cn_table: if isinstance(x, (core.CatmaidNeuron, core.CatmaidNeuronList)): snapshot['cn_table'] = connectivity.cn_table_from_connectors( x, remote_instance=remote_instance) else: # Add connectivity table snapshot['cn_table'] = fetch.get_partners( x, remote_instance=remote_instance) # Add connectivity table if node_details: snapshot['treenode_details'] = fetch.get_node_details( skdata.nodes.treenode_id.values, remote_instance=remote_instance) snapshot['connector_details'] = fetch.get_node_details( skdata.connectors.connector_id.values, remote_instance=remote_instance) # Add adjacency matrix if adjacency_matrix: if isinstance(x, (core.CatmaidNeuron, core.CatmaidNeuronList)): snapshot[ 'adjacency_matrix'] = connectivity.adjacency_from_connectors( x, remote_instance=remote_instance) else: # Add connectivity table snapshot['adjacency_matrix'] = connectivity.adjacency_matrix( x, remote_instance=remote_instance) # Add annotations if annotations: snapshot['annotations'] = fetch.get_annotations(skids) # Add connector details if cn_details: snapshot['cn_details'] = fetch.get_connector_details(skdata) return snapshot
def resample_neuron(x, resample_to, method='linear', inplace=False, skip_errors=True): """ Resamples neuron(s) to given NM resolution. Preserves root, leafs, branchpoints. Tags and connectors are mapped onto the closest new treenode. Columns "confidence" and "creator" of the treenode table are discarded. Important --------- This generates an entirely new set of treenode IDs! Those will be unique within a neuron, but you may encounter duplicates across neurons. Also: be aware that high-resolution neurons will use A LOT of memory. Parameters ---------- x : CatmaidNeuron | CatmaidNeuronList Neuron(s) to resample. resample_to : int New resolution in NANOMETERS. method : str, optional See ``scipy.interpolate.interp1d`` for possible options. By default, we're using linear interpolation. inplace : bool, optional If True, will modify original neuron. If False, a resampled copy is returned. skip_errors : bool, optional If True, will skip errors during interpolation and only print summary. Returns ------- CatmaidNeuron/List Downsampled neuron(s). Only if ``inplace=False``. See Also -------- :func:`pymaid.downsample_neuron` This function reduces the number of nodes instead of resample to certain resolution. Useful if you are just after some simplification e.g. for speeding up your calculations or you want to preserve treenode IDs. """ if isinstance(x, core.CatmaidNeuronList): results = [ resample_neuron(x[i], resample_to, method='method', inplace=inplace, skip_errors=skip_errors) for i in config.trange(x.shape[0], desc='Resampl. neurons', disable=config.pbar_hide, leave=config.pbar_leave) ] if not inplace: return core.CatmaidNeuronList(results) elif not isinstance(x, core.CatmaidNeuron): logger.error('Unexpected datatype: %s' % str(type(x))) raise ValueError if not inplace: x = x.copy() # Collect some information for later nodes = x.nodes.set_index('treenode_id') locs = nodes[['x', 'y', 'z']] radii = nodes['radius'].to_dict() new_nodes = [] max_tn_id = x.nodes.treenode_id.max() + 1 errors = 0 # Iterate over segments for i, seg in enumerate( config.tqdm(x.small_segments, desc='Proc. segments', disable=config.pbar_hide, leave=False)): # Get coordinates coords = locs.loc[seg].values.astype(float) # Get radii rad = [radii[tn] for tn in seg] # Vecs between subsequently measured points vecs = np.diff(coords.T) # path: cum distance along points (norm from first to ith point) path = np.cumsum(np.linalg.norm(vecs, axis=0)) path = np.insert(path, 0, 0) # If path is too short, just keep the first and last treenode if path[-1] < resample_to or (method == 'cubic' and len(seg) <= 3): new_nodes += [[ seg[0], seg[-1], None, coords[0][0], coords[0][1], coords[0][2], radii[seg[0]], 5 ]] continue # Coords of interpolation n_nodes = int(path[-1] / resample_to) interp_coords = np.linspace(path[0], path[-1], n_nodes) try: sampleX = scipy.interpolate.interp1d(path, coords[:, 0], kind=method) sampleY = scipy.interpolate.interp1d(path, coords[:, 1], kind=method) sampleZ = scipy.interpolate.interp1d(path, coords[:, 2], kind=method) sampleR = scipy.interpolate.interp1d(path, rad, kind=method) except ValueError as e: if skip_errors: errors += 1 new_nodes += x.nodes.loc[x.nodes.treenode_id.isin(seg[:-1]), [ 'treenode_id', 'parent_id', 'creator_id', 'x', 'y', 'z', 'radius', 'confidence' ]].values.tolist() continue else: raise e # Sample each dim xnew = sampleX(interp_coords) ynew = sampleY(interp_coords) znew = sampleZ(interp_coords) rnew = sampleR(interp_coords).round(1) # Generate new coordinates new_coords = np.array([xnew, ynew, znew]).T.round() # Generate new ids (start and end node IDs of this segment) new_ids = seg[:1] + [ max_tn_id + i for i in range(len(new_coords) - 2) ] + seg[-1:] # Keep track of new nodes new_nodes += [[ tn, pn, None, co[0], co[1], co[2], -1, 5 ] for tn, pn, co, r in zip(new_ids[:-1], new_ids[1:], new_coords, rnew) ] # Increase max index max_tn_id += len(new_ids) if errors: logger.warning('{} ({:.0%}) segments skipped due to errors'.format( errors, errors / i)) # Add root node(s) root = x.root if not isinstance(root, (np.ndarray, list)): root = [x.root] root = x.nodes.loc[x.nodes.treenode_id.isin(root), [ 'treenode_id', 'parent_id', 'creator_id', 'x', 'y', 'z', 'radius', 'confidence' ]] new_nodes += [list(r) for r in root.values] # Generate new nodes dataframe new_nodes = pd.DataFrame(data=new_nodes, columns=[ 'treenode_id', 'parent_id', 'creator_id', 'x', 'y', 'z', 'radius', 'confidence' ], dtype=object) # Convert columns to appropriate dtypes dtypes = { 'treenode_id': int, 'parent_id': object, 'x': int, 'y': int, 'z': int, 'radius': int, 'confidence': int } for k, v in dtypes.items(): new_nodes[k] = new_nodes[k].astype(v) # Remove duplicate treenodes (branch points) new_nodes = new_nodes[~new_nodes.treenode_id.duplicated()] # Map connectors back: # 1. Get position of old synapse-bearing treenodes old_tn_position = x.nodes.set_index('treenode_id').loc[ x.connectors.treenode_id, ['x', 'y', 'z']].values # 2. Get closest neighbours distances = scipy.spatial.distance.cdist(old_tn_position, new_nodes[['x', 'y', 'z']].values) min_ix = np.argmin(distances, axis=1) # 3. Map back onto neuron x.connectors['treenode_id'] = new_nodes.iloc[min_ix].treenode_id.values # Map tags back: if x.tags: # 1. Get position of old tag bearing treenodes tag_tn = set([tn for l in x.tags.values() for tn in l]) old_tn_position = x.nodes.set_index('treenode_id').loc[ tag_tn, ['x', 'y', 'z']].values # 2. Get closest neighbours distances = scipy.spatial.distance.cdist( old_tn_position, new_nodes[['x', 'y', 'z']].values) min_ix = np.argmin(distances, axis=1) # 3. Create a dictionary new_tag_tn = { tn: new_nodes.iloc[min_ix[i]].treenode_id for i, tn in enumerate(tag_tn) } # 4. Map tags back new_tags = {t: [new_tag_tn[tn] for tn in x.tags[t]] for t in x.tags} x.tags = new_tags # Set nodes x.nodes = new_nodes # Clear and regenerate temporary attributes x._clear_temp_attr() if not inplace: return x
def downsample_neuron( x, resampling_factor, preserve_cn_treenodes=True, preserve_tag_treenodes=False, inplace=False, ): """ Downsamples neuron(s) by a given factor. Preserves root, leafs, branchpoints by default. Preservation of treenodes with synapses can be toggled. Parameters ---------- x : CatmaidNeuron | CatmaidNeuronList Neuron(s) to downsample. resampling_factor : int Factor by which to reduce the node count. preserve_cn_treenodes : bool, optional If True, treenodes that have connectors are preserved. preserve_tag_treenodes : bool, optional If True, treenodes with tags are preserved. inplace : bool, optional If True, will modify original neuron. If False, a downsampled copy is returned. Returns ------- CatmaidNeuron/List Downsampled neuron. Only if ``inplace=False``. Notes ----- Use ``resampling_factor=float('inf')`` and ``preserve_cn_treenodes=False`` to get a neuron consisting only of root, branch and end points. See Also -------- :func:`pymaid.resample_neuron` This function resamples a neuron to given resolution. This will not preserve treenode IDs! """ if isinstance(x, core.CatmaidNeuronList): return core.CatmaidNeuronList([ downsample_neuron(n, resampling_factor, inplace=inplace) for n in x ]) elif isinstance(x, core.CatmaidNeuron): if not inplace: x = x.copy() else: logger.error('Unexpected datatype: %s' % str(type(x))) raise ValueError # If no resampling, simply return neuron if resampling_factor <= 1: raise ValueError('Resampling factor must be > 1.') if x.nodes.shape[0] <= 1: logger.warning('No nodes in neuron {}. Skipping...'.format( x.skeleton_id)) if not inplace: return x else: return logger.debug('Preparing to downsample neuron...') list_of_parents = { n.treenode_id: n.parent_id for n in x.nodes.itertuples() } list_of_parents[None] = None if 'type' not in x.nodes: graph_utils.classify_nodes(x) selection = x.nodes.type != 'slab' if preserve_cn_treenodes: selection = selection | x.nodes.treenode_id.isin( x.connectors.treenode_id) if preserve_tag_treenodes: with_tags = [t for l in x.tags.values() for t in l] selection = selection | x.nodes.treenode_id.isin(with_tags) fix_points = x.nodes[selection].treenode_id.values # Add soma node if not isinstance(x.soma, type(None)) and x.soma not in fix_points: fix_points = np.append(fix_points, x.soma) # Walk from all fix points to the root - jump N nodes on the way new_parents = {} logger.debug( 'Sampling neuron down by factor of {0}'.format(resampling_factor)) for en in fix_points: this_node = en while True: stop = False new_p = list_of_parents[this_node] if new_p: i = 0 while i < resampling_factor: if new_p in fix_points or not new_p: new_parents[this_node] = new_p stop = True break new_p = list_of_parents[new_p] i += 1 if stop is True: break else: new_parents[this_node] = new_p this_node = new_p else: new_parents[this_node] = None break new_nodes = x.nodes[x.nodes.treenode_id.isin(list( new_parents.keys()))].copy() new_nodes.loc[:, 'parent_id'] = [ new_parents[tn] for tn in new_nodes.treenode_id ] # We have to temporarily set parent of root node from 1 to an integer root_ix = new_nodes[new_nodes.parent_id.isnull()].index new_nodes.loc[root_ix, 'parent_id'] = 0 # first convert everything to int new_nodes.loc[:, 'parent_id'] = new_nodes.parent_id.values.astype(int) # then back to object so that we can add a 'None' new_nodes.loc[:, 'parent_id'] = new_nodes.parent_id.values.astype(object) # Reassign parent_id None to root node new_nodes.loc[root_ix, 'parent_id'] = None logger.debug('Nodes before/after: {}/{}'.format(len(x.nodes), len(new_nodes))) x.nodes = new_nodes # This is essential -> otherwise e.g. graph.neuron2graph will fail x.nodes.reset_index(inplace=True, drop=True) x._clear_temp_attr() if not inplace: return x
def cut_neuron(x, cut_node, ret='both'): """ Split neuron at given point and returns two new neurons. Note ---- Split is performed between cut node and its parent node. However, cut node will still be present in both resulting neurons. Parameters ---------- x : CatmaidNeuron | CatmaidNeuronList Must be a single neuron. cut_node : int | str | list Node ID(s) or a tag(s) of the node(s) to cut. Multiple cuts are performed in the order of ``cut_node``. Fragments are ordered distal -> proximal. ret : 'proximal' | 'distal' | 'both', optional Define which parts of the neuron to return. Use this to speed up processing when making only a single cut! Returns ------- distal -> proximal : CatmaidNeuronList Distal and proximal part of the neuron. Only if ``ret='both'``. The distal->proximal order of fragments is tried to be maintained for multiple cuts but is not guaranteed. distal : CatmaidNeuronList Distal part of the neuron. Only if ``ret='distal'``. proximal : CatmaidNeuronList Proximal part of the neuron. Only if ``ret='proximal'``. Examples -------- First example: single cut at node tag >>> import pymaid >>> rm = pymaid.CatmaidInstance(url, http_user, http_pw, token) >>> n = pymaid.get_neuron(16) >>> # Cut neuron >>> nl = cut_neuron(n, 'SCHLEGEL_LH') >>> nl <class 'pymaid.core.CatmaidNeuronList'> of 2 neurons neuron_name skeleton_id n_nodes n_connectors \ 0 PN glomerulus VA6 017 DB 16 3603 1295 1 PN glomerulus VA6 017 DB 16 9142 741 n_branch_nodes n_end_nodes open_ends cable_length review_status soma 0 303 323 3 960.118516 NA False 1 471 501 278 1905.986926 NA True Second example: multiple cuts at low confidence edges >>> # Get neuron >>> n = pymaid.get_neuron(27295) >>> # Get IDs of low confidence treenodes >>> lc = n.nodes[n.nodes.confidence < 5].treenode_id.values >>> # Cut neuron >>> nl = pymaid.cut_neuron(n, lc) See Also -------- :func:`pymaid.CatmaidNeuron.prune_distal_to` :func:`pymaid.CatmaidNeuron.prune_proximal_to` ``CatmaidNeuron/List`` shorthands to this function. :func:`pymaid.subset_neuron` Returns a neuron consisting of a subset of its treenodes. """ if ret not in ['proximal', 'distal', 'both']: raise ValueError('ret must be either "proximal", "distal" or "both"!') if isinstance(x, core.CatmaidNeuron): pass elif isinstance(x, core.CatmaidNeuronList): if x.shape[0] == 1: x = x[0] else: logger.error( '%i neurons provided. Please provide only a single neuron!' % x.shape[0]) raise Exception( '%i neurons provided. Please provide only a single neuron!' % x.shape[0]) else: raise TypeError('Unable to process data of type "{0}"'.format(type(x))) # Turn cut node into iterable if not utils._is_iterable(cut_node): cut_node = [cut_node] # Process cut nodes (i.e. if tag) cn_ids = [] for cn in cut_node: # If cut_node is a tag (rather than an ID), try finding that node if isinstance(cn, str): if cn not in x.tags: raise ValueError( '#{}: Found no treenode with tag {} - please double check!' .format(x.skeleton_id, cn)) cn_ids += x.tags[cn] elif cn not in x.nodes.treenode_id.values: raise ValueError('No treenode with ID "{}" found.'.format(cn)) else: cn_ids.append(cn) # Remove duplicates while retaining order - set() would mess that up seen = set() cn_ids = [cn for cn in cn_ids if not (cn in seen or seen.add(cn))] # Warn if not all returned if len(cn_ids) > 1 and ret != 'both': logger.warning('Multiple cuts should use `ret = True`.') # Go over all cut_nodes -> order matters! res = [x] for cn in cn_ids: # First, find out in which neuron the cut node is to_cut = [n for n in res if cn in n.nodes.treenode_id.values][0] to_cut_ix = res.index(to_cut) # Remove this neuron from results (will be cut into two) res.remove(to_cut) # Cut neuron if x.igraph and config.use_igraph: cut = _cut_igraph(to_cut, cn, ret) else: cut = _cut_networkx(to_cut, cn, ret) # If ret != 'both', we will get only a single neuron if not utils._is_iterable(cut): cut = [cut] # Add results back to results at same index, proximal first for c in cut[::-1]: res.insert(to_cut_ix, c) return core.CatmaidNeuronList(res)
def split_into_fragments(x, n=2, min_size=None, reroot_to_soma=False): """ Splits neuron into fragments. Notes ----- Cuts are based on longest neurites: the first cut is made where the second largest neurite merges onto the largest neurite, the second cut is made where the third largest neurite merges into either of the first fragments and so on. Parameters ---------- x : CatmaidNeuron | CatmaidNeuronList May contain only a single neuron. n : int, optional Number of fragments to split into. Must be >1. min_size : int, optional Minimum size of fragment in um to be cut off. If too small, will stop cutting. This takes only the longest path in each fragment into account! reroot_to_soma : bool, optional If True, neuron will be rerooted to soma. Returns ------- CatmaidNeuronList Examples -------- >>> x = pymaid.get_neuron('16') >>> # Cut into two fragments >>> cut1 = pymaid.split_into_fragments(x, n=2) >>> # Cut into fragments of >10 um size >>> cut2 = pymaid.split_into_fragments(x, n=float('inf'), min_size=10) """ if isinstance(x, core.CatmaidNeuron): pass elif isinstance(x, core.CatmaidNeuronList): if x.shape[0] == 1: x = x[0] else: logger.error( '%i neurons provided. Please provide only a single neuron!' % x.shape[0]) raise Exception else: raise TypeError('Unable to process data of type "{0}"'.format(type(x))) if n < 2: raise ValueError('Number of fragments must be at least 2.') if reroot_to_soma and x.soma: x.reroot(x.soma) # Collect treenodes of the n longest neurites tn_to_preserve = [] fragments = [] i = 0 while i < n: if tn_to_preserve: # Generate fresh graph g = graph.neuron2nx(x) # Remove nodes that we have already preserved g.remove_nodes_from(tn_to_preserve) else: g = x.graph # Get path longest_path = nx.dag_longest_path(g) # Check if fragment is still long enough if min_size: this_length = sum([ v / 1000 for k, v in nx.get_edge_attributes(x.graph, 'weight').items() if k[1] in longest_path ]) if this_length <= min_size: break tn_to_preserve += longest_path fragments.append(longest_path) i += 1 # Next, make some virtual cuts and get the complement of treenodes for each fragment graphs = [x.graph.copy()] for fr in fragments[1:]: this_g = nx.bfs_tree(x.graph, fr[-1], reverse=True) graphs.append(this_g) # Next, we need to remove treenodes that are in subsequent graphs from those graphs for i, g in enumerate(graphs): for g2 in graphs[i + 1:]: g.remove_nodes_from(g2.nodes) # Now make neurons nl = core.CatmaidNeuronList( [subset_neuron(x, g, clear_temp=True) for g in graphs]) # Rename neurons for i, n in enumerate(nl): n.neuron_name += '_{}'.format(i) return nl
def from_swc(f, neuron_name=None, neuron_id=None, pre_label=None, post_label=None): """ Generate neuron object from SWC file. This import is following format specified here: http://research.mssm.edu/cnic/swc.html Important --------- This import assumes coordinates in SWC are in microns and will convert to nanometers! Soma is inferred from radius (>0), not the label. Parameters ---------- f : str SWC filename or folder. If folder, will import all ``.swc`` files. neuronname : str, optional Name to use for the neuron. If not provided, will use filename. neuron_id : int, optional Unique identifier (essentially skeleton ID). If not provided, will generate one from scratch. pre/post_label : bool | int, optional If not ``None``, will try to extract pre-/postsynapses from label column. Returns ------- CatmaidNeuron/List See Also -------- :func:`pymaid.to_swc` Export neurons as SWC files. """ if os.path.isdir(f): swc = [ os.path.join(f, x) for x in os.listdir(f) if os.path.isfile(os.path.join(f, x)) and x.endswith('.swc') ] return core.CatmaidNeuronList([ from_swc(x, neuron_name=neuron_name, neuron_id=neuron_id, pre_label=pre_label, post_label=post_label) for x in config.tqdm(swc, desc='Importing', disable=config.pbar_hide, leave=config.pbar_leave) ]) if not neuron_id: neuron_id = uuid.uuid4().int if not neuron_name: neuron_name = os.path.basename(f) data = [] with open(f) as file: reader = csv.reader(file, delimiter=' ') for row in reader: # skip empty rows if not row: continue # skip comments if not row[0].startswith('#'): data.append(row) # Remove empty entries and generate nodes DataFrame nodes = pd.DataFrame( [[float(e) for e in row if e != ''] for row in data], columns=['treenode_id', 'label', 'x', 'y', 'z', 'radius', 'parent_id'], dtype=object) # Root node will have parent=-1 -> set this to None nodes.loc[nodes.parent_id < 0, 'parent_id'] = None # Bring radius from um into nm space nodes[['x', 'y', 'z', 'radius']] *= 1000 connectors = pd.DataFrame( [], columns=['treenode_id', 'connector_id', 'relation', 'x', 'y', 'z'], dtype=object) if pre_label: pre = nodes[nodes.label == pre_label][['treenode_id', 'x', 'y', 'z']] pre['connector_id'] = None pre['relation'] = 0 connectors = pd.concat([connectors, pre], axis=0) if post_label: post = nodes[nodes.label == post_label][['treenode_id', 'x', 'y', 'z']] post['connector_id'] = None post['relation'] = 1 connectors = pd.concat([connectors, post], axis=0) df = pd.DataFrame( [[ neuron_name, str(neuron_id), nodes, connectors, {}, ]], columns=['neuron_name', 'skeleton_id', 'nodes', 'connectors', 'tags'], dtype=object) # Placeholder for graph representations of neurons df['igraph'] = None df['graph'] = None # Convert data to respective dtypes dtypes = { 'treenode_id': int, 'parent_id': object, 'creator_id': int, 'relation': int, 'connector_id': object, 'x': int, 'y': int, 'z': int, 'radius': int, 'confidence': int } for k, v in dtypes.items(): for t in ['nodes', 'connectors']: for i in range(df.shape[0]): if k in df.loc[i, t]: df.loc[i, t][k] = df.loc[i, t][k].astype(v) return core.CatmaidNeuron(df)
def _parse_objects(x, remote_instance=None): """ Helper class to extract objects for plotting. Returns ------- skids : list skdata : pymaid.CatmaidNeuronList dotprops : pd.DataFrame volumes : list points : list of arrays visuals : list of vispy visuals """ if not isinstance(x, list): x = [x] # Check for skeleton IDs skids = [] for ob in x: if isinstance(ob, (str, int)): try: skids.append(int(ob)) except BaseException: pass # Collect neuron objects and collate to single Neuronlist neuron_obj = [ ob for ob in x if isinstance(ob, (core.CatmaidNeuron, core.CatmaidNeuronList)) ] skdata = core.CatmaidNeuronList(neuron_obj, make_copy=False) # Collect visuals visuals = [ob for ob in x if 'vispy' in str(type(ob))] # Collect dotprops dotprops = [ob for ob in x if isinstance(ob, core.Dotprops)] if len(dotprops) == 1: dotprops = dotprops[0] elif len(dotprops) == 0: dotprops = core.Dotprops() elif len(dotprops) > 1: dotprops = pd.concat(dotprops) # Collect and parse volumes volumes = [ob for ob in x if isinstance(ob, (core.Volume, str))] # Collect dataframes with X/Y/Z coordinates # Note: dotprops and volumes are instances of pd.DataFrames dataframes = [ ob for ob in x if isinstance(ob, pd.DataFrame) and not isinstance(ob, (core.Dotprops, core.Volume)) ] if [ d for d in dataframes if False in [c in d.columns for c in ['x', 'y', 'z']] ]: logger.warning('DataFrames must have x, y and z columns.') # Filter to and extract x/y/z coordinates dataframes = [ d for d in dataframes if False not in [c in d.columns for c in ['x', 'y', 'z']] ] dataframes = [d[['x', 'y', 'z']].values for d in dataframes] # Collect arrays arrays = [ob.copy() for ob in x if isinstance(ob, np.ndarray)] # Remove arrays with wrong dimensions if [ob for ob in arrays if ob.shape[1] != 3]: logger.warning('Point objects need to be of shape (n,3).') arrays = [ob for ob in arrays if ob.shape[1] == 3] points = dataframes + arrays return skids, skdata, dotprops, volumes, points, visuals
def neuron2json(x, **kwargs): """ Generate JSON formatted ``str`` respresentation of CatmaidNeuron/List. Notes ----- Nodes and connectors are serialised using pandas' ``to_json()``. Most other items in the neuron's __dict__ are serialised using ``json.dumps()``. Properties not serialised: `._remote_instance`, `.graph`, `.igraph`. Important --------- For safety, the :class:`~pymaid.CatmaidInstance` is not serialised as this would expose your credentials. Parameters attached to a neuronlist are currently not preserved. Parameters ---------- x : CatmaidNeuron | CatmaidNeuronList **kwargs Parameters passed to ``json.dumps()`` and ``pandas.DataFrame.to_json()``. Returns ------- str See Also -------- :func:`~pymaid.json2neuron` Read json back into pymaid neurons. """ if not isinstance(x, (core.CatmaidNeuron, core.CatmaidNeuronList)): raise TypeError('Unable to convert data of type "{0}"'.format(type(x))) if isinstance(x, core.CatmaidNeuron): x = core.CatmaidNeuronList([x]) data = [] for n in x: this_data = {'skeleton_id': n.skeleton_id} if 'nodes' in n.__dict__: this_data['nodes'] = n.nodes.to_json() if 'connectors' in n.__dict__: this_data['connectors'] = n.connectors.to_json(**kwargs) for k in n.__dict__: if k in [ 'nodes', 'connectors', 'graph', 'igraph', '_remote_instance', 'segments', 'small_segments', 'nodes_geodesic_distance_matrix', 'dps', 'simple' ]: continue try: this_data[k] = n.__dict__[k] except: logger.error('Lost attribute "{0}"'.format(k)) data.append(this_data) return json.dumps(data, **kwargs)