def wrapper(*args, **kwargs): # Get remote instance either from kwargs or global rm = utils._eval_remote_instance(kwargs.get('remote_instance', None)) try: # Keep track of what point in the query log we are n_queries = len(rm._cache.request_log) old_cache = set(rm._cache.keys()) # Execute function the first time (make sure no new data is added # if exception is raised) res = undo_on_error(function)(*args, **kwargs) except: # If caching is on, try the function without caching if rm.caching: # If function failed without even using cached data, raise if not set(rm._cache.request_log[n_queries:]) & old_cache: raise # Remove requested data before retrying for q in rm._cache.request_log[n_queries:]: if q in rm._cache: rm._cache.pop(q) logger.info( 'Failed using cached data. Cleaning cache and retrying...') # Retry function res = undo_on_error(function)(*args, **kwargs) # If caching is off, raise right away else: raise # Return result return res
def wrapper(*args, **kwargs): # Get remote instance either from kwargs or global rm = utils._eval_remote_instance( kwargs.get('remote_instance', None)) try: # Execute function res = function(*args, **kwargs) except: # If caching is on, try the function without caching if rm.caching: logger.info( 'Failed using cached data. Clearing relevant entries and retrying...' ) for f in to_clear: url = getattr(rm, f)() rm._cache.clear_cached_url(url) try: res = function(*args, **kwargs) except: raise # If caching is off, raise right away else: raise # Return result return res
def wrapper(*args, **kwargs): # Get remote instance either from kwargs or global rm = utils._eval_remote_instance(kwargs.get('remote_instance', None)) # Keep track of old caching settings old_value = rm.caching # Set caching to False rm.caching = False # Execute function res = function(*args, **kwargs) # Set caching to old value rm.caching = old_value # Return result return res
def _in_volume_convex(points, volume, remote_instance=None, approximate=False, ignore_axis=[]): """ Uses scipy to test if points are within a given CATMAID volume. The idea is to test if adding the point to the pointcloud changes the convex hull -> if yes, that point is outside the convex hull. """ remote_instance = utils._eval_remote_instance(remote_instance) if isinstance(volume, str): volume = fetch.get_volume(volume, remote_instance) verts = volume.vertices if not approximate: intact_hull = ConvexHull(verts) intact_verts = list(intact_hull.vertices) if isinstance(points, list): points = np.array(points) elif isinstance(points, pd.DataFrame): points = points.to_matrix() return [ list(ConvexHull(np.append(verts, list([p]), axis=0)).vertices) == intact_verts for p in points ] else: bbox = [(min([v[0] for v in verts]), max([v[0] for v in verts])), (min([v[1] for v in verts]), max([v[1] for v in verts])), (min([v[2] for v in verts]), max([v[2] for v in verts]))] for a in ignore_axis: bbox[a] = (float('-inf'), float('inf')) return [ False not in [ bbox[0][0] < p.x < bbox[0][1], bbox[1][0] < p.y < bbox[1][1], bbox[2][0] < p.z < bbox[2][1], ] for p in points ]
def wrapper(*args, **kwargs): # Get remote instance either from kwargs or global rm = utils._eval_remote_instance(kwargs.get('remote_instance', None)) # Keep existing entries # DO NOT remove the list() old = list(rm._cache.keys()) try: # Execute function res = function(*args, **kwargs) except BaseException: # If error was raised, remove new entries from cache new_entries = [k for k in rm._cache.keys() if k not in old] # Remove new entries from cache for k in new_entries: _ = rm._cache.pop(k) raise return res
def __init__(self, bbox, zoom_level=0, coords='NM', mem_lim=4000, remote_instance=None): """ Initialise class. """ if coords not in ['PIXEL', 'NM']: raise ValueError('Coordinates need to be "PIXEL" or "NM".') # Convert single bbox to multiple bounding boxes if isinstance(bbox, np.ndarray): if bbox.ndim == 1: self.bboxes = [bbox] elif bbox.ndim == 2: self.bboxes = bbox else: raise ValueError( 'Unable to interpret bounding box with {0} dimensions'. format(bbox.ndim)) elif isinstance(bbox, list): if any(isinstance(el, (list, np.ndarray)) for el in bbox): self.bboxes = bbox else: self.bboxes = [bbox] else: raise TypeError( 'Bounding box must be list or array, not {0}'.format( type(bbox))) self.remote_instance = utils._eval_remote_instance(remote_instance) self.zoom_level = zoom_level self.coords = coords self.mem_lim = mem_lim self.get_stack_info() self.bboxes2imgcoords() memory_est = self.estimate_memory() logger.info('Estimated memory usage: {0:.2f} Mb'.format(memory_est))
def wrapper(*args, **kwargs): # Get remote instance either from kwargs or global rm = utils._eval_remote_instance(kwargs.get('remote_instance', None)) try: # Execute function res = function(*args, **kwargs) except: # If caching is on, try the function without caching if rm.caching: logger.info( 'Failed using cached data. Retrying without caching...') rm.caching = False try: res = function(*args, **kwargs) except: raise finally: # Make sure to re-enable caching rm.caching = True # If caching is off, raise right away else: raise # Return result return res
def neuron2py(neuron, remote_instance=None): """ Converts an rcatmaid ``neuron`` or ``neuronlist`` object to a PyMaid :class:`~pymaid.CatmaidNeuron`/:class:`~pymaid.CatmaidNeuronList`. Notes ----- Node creator and confidence are not included in R's neuron/neuronlist and will be imported as ``None``. Parameters ---------- neuron : R neuron | R neuronlist Neuron to convert to Python remote_instance : CATMAID instance, optional Provide if you want neuron names to be updated from. Returns ------- pymaid CatmaidNeuronList """ if 'rpy2' in str(type(neuron)): if cl(neuron)[0] == 'neuronlist': neuron_list = pd.DataFrame( data=[[data2py(e) for e in n] for n in neuron], columns=list(neuron[0].names)) # neuron_list.columns = data.names #[ 'NumPoints', # 'StartPoint','BranchPoints','EndPoints','nTrees', 'NumSeqs', # 'SegList', 'd', 'skid', 'connectors', 'tags','url', 'headers' ] if 'df' in neuron.slots: neuron_list['name'] = neuron.slots['df'][2] else: neuron_list['name'] = ['NA'] * neuron_list.shape[0] elif cl(neuron)[0] == 'catmaidneuron' or cl(neuron)[0] == 'neuron': neuron_list = pd.DataFrame( data=[[e for e in neuron]], columns=neuron.names) neuron_list = neuron_list.applymap(data2py) neuron_list['name'] = ['NA'] # neuron_list.columns = neuron.names #[ 'NumPoints', # 'StartPoint','BranchPoints','EndPoints','nTrees', 'NumSeqs', # 'SegList', 'd', 'skid', 'connectors', 'tags','url', 'headers' ] neuron = neuron_list remote_instance = utils._eval_remote_instance(remote_instance, raise_error=False) # Nat function may return neuron objects that have ONLY nodes - no # connectors, skeleton_id, name or tags! if 'skid' in neuron and remote_instance: neuron_names = fetch.get_names( [n[0] for n in neuron.skid.tolist()], remote_instance) elif 'skid' in neuron and not remote_instance: neuron_names = None logger.info( 'Please provide a remote instance if you want to add neuron name.') else: logger.warning( 'Neuron has only nodes (no name, skid, connectors or tags).') data = [] for i in range(neuron.shape[0]): # Note that radius is divided by 2 -> this is because in rcatmaid the # original radius is doubled for some reason nodes = pd.DataFrame([[no.PointNo, no.Parent, None, no.X, no.Y, no.Z, no.W / 2, None] for no in neuron.loc[i, 'd'].itertuples()], dtype=object) nodes.columns = ['treenode_id', 'parent_id', 'creator_id', 'x', 'y', 'z', 'radius', 'confidence'] nodes.loc[nodes.parent_id == -1, 'parent_id'] = None if 'connectors' in neuron: connectors = pd.DataFrame([[cn.treenode_id, cn.connector_id, cn.prepost, cn.x, cn.y, cn.z] for cn in neuron.loc[i, 'connectors'].itertuples()], dtype=object) connectors.columns = ['treenode_id', 'connector_id', 'relation', 'x', 'y', 'z'] else: connectors = pd.DataFrame( columns=['treenode_id', 'connector_id', 'relation', 'x', 'y', 'z']) if 'skid' in neuron: skid = neuron.loc[i, 'skid'][0] else: skid = 'NA' data.append([skid, nodes, connectors ]) df = pd.DataFrame(data=data, columns=['skeleton_id', 'nodes', 'connectors'], dtype=object ) df['igraph'] = None if 'tags' in neuron: df['tags'] = neuron.tags.tolist() else: df['tags'] = [{} for n in df.skeleton_id.tolist()] if 'skid' in neuron and neuron_names is not None: df['neuron_name'] = [neuron_names[ str(n)] for n in df.skeleton_id.tolist()] else: df['neuron_name'] = ['NA' for n in df.skeleton_id.tolist()] return core.CatmaidNeuronList(df, remote_instance=remote_instance)
def get_team_contributions(teams, neurons=None, remote_instance=None): """ Get contributions by teams: nodes, reviews, connectors, time invested. Notes ----- 1. Time calculation uses defaults from :func:`pymaid.get_time_invested`. 2. ``total_reviews`` > ``total_nodes`` is possible if nodes have been reviewed multiple times by different users. Similarly, ``total_reviews`` = ``total_nodes`` does not imply that the neuron is fully reviewed! Parameters ---------- teams dict Teams to group contributions for. Users must be logins. Format can be either: 1. Simple user assignments. For example:: {'teamA': ['user1', 'user2'], 'team2': ['user3'], ...]} 2. Users with start and end dates. Start and end date must be either ``datetime.date`` or a single ``pandas.date_range`` object. For example:: {'team1': { 'user1': (datetime.date(2017, 1, 1), datetime.date(2018, 1, 1)), 'user2': (datetime.date(2016, 6, 1), datetime.date(2017, 1, 1) } 'team2': { 'user3': pandas.date_range('2017-1-1', '2018-1-1'), }} Mixing both styles is permissible. For second style, use e.g. ``'user1': None`` for no date restrictions on that user. neurons skeleton ID(s) | CatmaidNeuron/List, optional Restrict check to given set of neurons. If CatmaidNeuron/List, will use this neurons nodes/ connectors. Use to subset contributions e.g. to a given neuropil by pruning neurons before passing to this function. remote_instance : Catmaid Instance, optional Either pass explicitly or define globally. Returns ------- pandas.DataFrame DataFrame in which each row represents a neuron. Example for two teams, ``teamA`` and ``teamB``: >>> df skeleton_id total_nodes teamA_nodes teamB_nodes ... 0 1 total_reviews teamA_reviews teamB_reviews ... 0 1 total_connectors teamA_connectors teamB_connectors ... 0 1 total_time teamA_time teamB_time 0 1 Examples -------- >>> from datetime import date >>> import pandas as pd >>> teams = {'teamA': ['user1', 'user2'], ... 'teamB': {'user3': None, ... 'user4': (date(2017, 1, 1), date(2018, 1, 1))}, ... 'teamC': {'user5': pd.date_range('2015-1-1', '2018-1-1')}} >>> stats = pymaid.get_team_contributions(teams) See Also -------- :func:`~pymaid.get_contributor_statistics` Gives you more basic info on neurons of interest such as total reconstruction/review time. :func:`~pymaid.get_time_invested` Time invested by individual users. Gives you more control over how time is calculated. """ remote_instance = utils._eval_remote_instance(remote_instance) # Prepare teams if not isinstance(teams, dict): raise TypeError('Expected teams of type dict, got {}'.format(type(teams))) beginning_of_time = datetime.date(1900, 1, 1) today = datetime.date.today() all_time = pd.date_range(beginning_of_time, today) for t in teams: if isinstance(teams[t], list): teams[t] = {u: all_time for u in teams[t]} elif isinstance(teams[t], dict): for u in teams[t]: if isinstance(teams[t][u], type(None)): teams[t][u] = all_time elif isinstance(teams[t][u], (tuple, list)): try: teams[t][u] = pd.date_range(*teams[t][u]) except BaseException: raise Exception('Error converting "{}" to pandas.date_range'.format(teams[t][u])) elif isinstance(teams[t][u], pd.core.indexes.datetimes.DatetimeIndex): pass else: TypeError('Expected user dates to be either None, tuple of datetimes or pandas.date_range, got {}'.format(type(teams[t][u]))) else: raise TypeError('Expected teams to be either lists or dicts of users, got {}'.format(type(teams[t]))) # Get all users all_users = [u for t in teams for u in teams[t]] # Prepare neurons - download if neccessary if not isinstance(neurons, type(None)): if isinstance(neurons, core.CatmaidNeuron): neurons = core.CatmaidNeuronList(neurons) elif isinstance(neurons, core.CatmaidNeuronList): pass else: neurons = fetch.get_neurons(neurons) else: all_dates = [d.date() for t in teams for u in teams[t] for d in teams[t][u]] neurons = fetch.find_neurons(users=all_users, from_date=min(all_dates), to_date=max(all_dates)) neurons.get_skeletons() # Get user list user_list = fetch.get_user_list(remote_instance).set_index('login') for u in all_users: if u not in user_list.index: raise ValueError('User "{}" not found in user list'.format(u)) # Get all node details all_node_details = fetch.get_node_details(neurons, remote_instance=remote_instance) # Get connector links link_details = fetch.get_connector_links(neurons) # link_details contains all links. We have to subset this to existing # connectors in case the input neurons have been pruned link_details = link_details[link_details.connector_id.isin(neurons.connectors.connector_id.values)] interval = 3 bin_width = '%iMin' % interval minimum_actions = 10 * interval stats = [] for n in config.tqdm(neurons, desc='Processing', disable=config.pbar_hide, leave=config.pbar_leave): # Get node details tn_ids = n.nodes.treenode_id.values.astype(str) cn_ids = n.connectors.connector_id.values.astype(str) current_status = config.pbar_hide config.pbar_hide = True node_details = all_node_details[all_node_details.node_id.isin(np.append(tn_ids, cn_ids))] config.pbar_hide = current_status # Extract node creation node_creation = node_details.loc[node_details.node_id.isin(tn_ids), ['creator', 'creation_time']].values node_creation = np.c_[node_creation, ['node_creation'] * node_creation.shape[0]] # Extract connector creation cn_creation = node_details.loc[node_details.node_id.isin(cn_ids), ['creator', 'creation_time']].values cn_creation = np.c_[cn_creation, ['cn_creation'] * cn_creation.shape[0]] # Extract edition times (treenodes + connectors) node_edits = node_details.loc[:, ['editor', 'edition_time']].values node_edits = np.c_[node_edits, ['editor'] * node_edits.shape[0]] # Link creation link_creation = link_details.loc[link_details.connector_id.isin(cn_ids), ['creator_id', 'creation_time']].values link_creation = np.c_[link_creation, ['link_creation'] * link_details.shape[0]] # Extract review times reviewers = [u for l in node_details.reviewers.values for u in l] timestamps = [ts for l in node_details.review_times.values for ts in l] node_review = np.c_[reviewers, timestamps, ['review'] * len(reviewers)] # Merge all timestamps (ignore edits for now) to get time_invested all_ts = pd.DataFrame(np.vstack([node_creation, node_review, cn_creation, link_creation, node_edits]), columns=['user', 'timestamp', 'type']) # Add column with just the date and make it the index all_ts['date'] = [v.date() for v in all_ts.timestamp.astype(datetime.date).values] all_ts.index = pd.to_datetime(all_ts.date) # Fill in teams for each timestamp based on user + date all_ts['team'] = None for t in teams: for u in teams[t]: # Assign all timestamps by this user in the right time to # this team existing_dates = (teams[t][u] & all_ts.index).unique() ss = (all_ts.index.isin(existing_dates)) & (all_ts.user.values == user_list.loc[u, 'id']) all_ts.loc[ss, 'team'] = t # Get total total_time = sum(all_ts.timestamp.to_frame().set_index( 'timestamp', drop=False).groupby(pd.Grouper(freq=bin_width)).count().values >= minimum_actions)[0] * interval this_neuron = [n.skeleton_id, n.n_nodes, n.n_connectors, node_review.shape[0], total_time] # Go over the teams and collect values for t in teams: # Subset to team this_team = all_ts[all_ts.team == t] if this_team.shape[0] > 0: # Subset to user ID team_time = sum(this_team.timestamp.to_frame().set_index( 'timestamp', drop=False).groupby(pd.Grouper(freq=bin_width)).count().values >= minimum_actions)[0] * interval team_nodes = this_team[this_team['type'] == 'node_creation'].shape[0] team_cn = this_team[this_team['type'] == 'cn_creation'].shape[0] team_rev = this_team[this_team['type'] == 'review'].shape[0] else: team_nodes = team_cn = team_rev = team_time = 0 this_neuron += [team_nodes, team_cn, team_rev, team_time] stats.append(this_neuron) cols = ['skeleton_id', 'total_nodes', 'total_connectors', 'total_reviews', 'total_time'] for t in teams: for s in ['nodes', 'connectors', 'reviews', 'time']: cols += ['{}_{}'.format(t, s)] stats = pd.DataFrame(stats, columns=cols) cols_ordered = ['skeleton_id'] + ['{}_{}'.format(t, v) for v in ['nodes', 'connectors', 'reviews', 'time']for t in ['total'] + list(teams)] stats = stats[cols_ordered] return stats
def get_time_invested(x, remote_instance=None, minimum_actions=10, treenodes=True, connectors=True, mode='SUM', max_inactive_time=3, start_date=None, end_date=None): """ Takes a list of neurons and calculates the time individual users have spent working on this set of neurons. Parameters ---------- x Which neurons to check. Can be either: 1. skeleton IDs (int or str) 2. neuron name (str, must be exact match) 3. annotation: e.g. 'annotation:PN right' 4. CatmaidNeuron or CatmaidNeuronList object If you pass a CatmaidNeuron/List, its data is used calculate time invested. You can exploit this to get time invested into a given compartment of a neurons, e.g. by pruning it to a volume. remote_instance : CatmaidInstance, optional Either pass explicitly or define globally. minimum_actions : int, optional Minimum number of actions per minute to be counted as active. treenodes : bool, optional If False, treenodes will not be taken into account connectors : bool, optional If False, connectors will not be taken into account mode : 'SUM' | 'OVER_TIME' | 'ACTIONS', optional (1) 'SUM' will return total time invested (in minutes) per user. (2) 'OVER_TIME' will return minutes invested/day over time. (3) 'ACTIONS' will return actions (node/connectors placed/edited) per day. max_inactive_time : int, optional Maximal time inactive in minutes. start_date : None | tuple | datetime.date, optional end_date : None | tuple | datetime.date, optional Restricts time invested to window. Applies to creation but not edition time! Returns ------- pandas.DataFrame If ``mode='SUM'``, values represent minutes invested. >>> df ... total creation edition review ... user1 ... user2 If ``mode='OVER_TIME'`` or ``mode='ACTIONS'``: >>> df ... date date date ... ... user1 ... user2 For `OVER_TIME`, values respresent minutes invested on that day. For `ACTIONS`, values represent actions (creation, edition, review) on that day. Important --------- Creation/Edition/Review times can overlap! This is why total time spent is not just creation + edition + review. Please note that this does currently not take placement of pre-/postsynaptic nodes into account! Be aware of the ``minimum_actions`` parameter: at low settings even a single actions (e.g. connecting a node) will add considerably to time invested. To keep total reconstruction time comparable to what Catmaid calculates, you should consider about 10 actions/minute (= a click every 6 seconds) and ``max_inactive_time`` of 3 mins. CATMAID gives reconstruction time across all users. Here, we calculate the time spent tracing for individuals. This may lead to a discrepancy between sum of time invested over of all users from this function vs. CATMAID's reconstruction time. Examples -------- Plot pie chart of contributions per user using Plotly. This example assumes that you have already imported and set up pymaid. >>> import plotly >>> stats = pymaid.get_time_invested(skids, remote_instance) >>> # Use plotly to generate pie chart >>> fig = {"data": [{"values": stats.total.tolist(), ... "labels": stats.user.tolist(), "type" : "pie" }]} >>> plotly.offline.plot(fig) Plot reconstruction efforts over time: >>> stats = pymaid.get_time_invested(skids, mode='OVER_TIME') >>> # Plot time invested over time >>> stats.T.plot() >>> # Plot cumulative time invested over time >>> stats.T.cumsum(axis=0).plot() >>> # Filter for major contributors >>> stats[stats.sum(axis=1) > 20].T.cumsum(axis=0).plot() """ def _extract_timestamps(ts, desc='Calc'): grouped = ts.set_index('timestamp', drop=False).groupby(['user', pd.Grouper(freq=bin_width)]).count() >= minimum_actions temp_stats = {} for u in config.tqdm(set(ts.user.unique()) & set(relevant_users), desc=desc, disable=config.pbar_hide, leave=False): temp_stats[u] = sum(grouped.loc[u].values)[0] * interval return temp_stats if mode not in ['SUM', 'OVER_TIME', 'ACTIONS']: raise ValueError('Unknown mode "%s"' % str(mode)) remote_instance = utils._eval_remote_instance(remote_instance) skids = utils.eval_skids(x, remote_instance) # Maximal inactive time is simply translated into binning # We need this later for pandas.TimeGrouper() anyway interval = max_inactive_time bin_width = '%iMin' % interval # Update minimum_actions to reflect actions/interval instead of actions/minute minimum_actions *= interval user_list = fetch.get_user_list(remote_instance).set_index('id') if not isinstance(x, (core.CatmaidNeuron, core.CatmaidNeuronList)): x = fetch.get_neuron(skids, remote_instance=remote_instance) if isinstance(x, core.CatmaidNeuron): skdata = core.CatmaidNeuronList(x) elif isinstance(x, core.CatmaidNeuronList): skdata = x if not isinstance(end_date, (datetime.date, type(None))): end_date = datetime.date(*end_date) if not isinstance(start_date, (datetime.date, type(None))): start_date = datetime.date(*start_date) # Extract connector and node IDs node_ids = [] connector_ids = [] for n in skdata.itertuples(): if treenodes: node_ids += n.nodes.treenode_id.tolist() if connectors: connector_ids += n.connectors.connector_id.tolist() # Get node details node_details = fetch.get_node_details( node_ids + connector_ids, remote_instance=remote_instance) # Get details for links link_details = fetch.get_connector_links(skdata) # link_details contains all links. We have to subset this to existing # connectors in case the input neurons have been pruned link_details = link_details[link_details.connector_id.isin(connector_ids)] # Remove timestamps outside of date range (if provided) if start_date: node_details = node_details[node_details.creation_time >= np.datetime64( start_date)] link_details = link_details[link_details.creation_time >= np.datetime64( start_date)] if end_date: node_details = node_details[node_details.creation_time <= np.datetime64( end_date)] link_details = link_details[link_details.creation_time <= np.datetime64( end_date)] # Dataframe for creation (i.e. the actual generation of the nodes) creation_timestamps = np.append(node_details[['creator', 'creation_time']].values, link_details[['creator_id', 'creation_time']].values, axis=0) creation_timestamps = pd.DataFrame(creation_timestamps, columns=['user', 'timestamp']) # Dataframe for edition times - can't use links as there is no editor edition_timestamps = node_details[['editor', 'edition_time']] edition_timestamps.columns = ['user', 'timestamp'] # Generate dataframe for reviews reviewers = [u for l in node_details.reviewers.values for u in l] timestamps = [ts for l in node_details.review_times.values for ts in l] review_timestamps = pd.DataFrame([[u, ts] for u, ts in zip( reviewers, timestamps)], columns=['user', 'timestamp']) # Merge all timestamps all_timestamps = pd.concat( [creation_timestamps, edition_timestamps, review_timestamps], axis=0) all_timestamps.sort_values('timestamp', inplace=True) relevant_users = all_timestamps.groupby('user').count() relevant_users = relevant_users[relevant_users.timestamp >= minimum_actions].index.values if mode == 'SUM': stats = { 'total': {u: 0 for u in relevant_users}, 'creation': {u: 0 for u in relevant_users}, 'edition': {u: 0 for u in relevant_users}, 'review': {u: 0 for u in relevant_users} } stats['total'].update(_extract_timestamps(all_timestamps, desc='Calc total')) stats['creation'].update(_extract_timestamps(creation_timestamps, desc='Calc creation')) stats['edition'].update(_extract_timestamps(edition_timestamps, desc='Calc edition')) stats['review'].update(_extract_timestamps(review_timestamps, desc='Calc review')) return pd.DataFrame([[user_list.loc[u, 'login'], stats['total'][u], stats['creation'][u], stats['edition'][u], stats['review'][u]] for u in relevant_users], columns=['user', 'total', 'creation', 'edition', 'review']).sort_values('total', ascending=False).reset_index(drop=True).set_index('user') elif mode == 'ACTIONS': all_ts = all_timestamps.set_index('timestamp', drop=False).timestamp.groupby( pd.Grouper(freq='1d')).count().to_frame() all_ts.columns = ['all_users'] all_ts = all_ts.T # Get total time spent for u in config.tqdm(all_timestamps.user.unique(), desc='Calc. total', disable=config.pbar_hide, leave=False): this_ts = all_timestamps[all_timestamps.user == u].set_index( 'timestamp', drop=False).timestamp.groupby(pd.Grouper(freq='1d')).count().to_frame() this_ts.columns = [user_list.loc[u, 'login']] all_ts = pd.concat([all_ts, this_ts.T]) return all_ts.fillna(0) elif mode == 'OVER_TIME': # First count all minutes with minimum number of actions minutes_counting = (all_timestamps.set_index('timestamp', drop=False).timestamp.groupby( pd.Grouper(freq=bin_width)).count().to_frame() > minimum_actions) # Then remove the minutes that have less than minimum actions minutes_counting = minutes_counting[minutes_counting.timestamp == True] # Now group by hour all_ts = minutes_counting.groupby(pd.Grouper(freq='1d')).count() all_ts.columns = ['all_users'] all_ts = all_ts.T # Get total time spent for u in config.tqdm(all_timestamps.user.unique(), desc='Calc. total', disable=config.pbar_hide, leave=False): minutes_counting = (all_timestamps[all_timestamps.user == u].set_index( 'timestamp', drop=False).timestamp.groupby(pd.Grouper(freq=bin_width)).count().to_frame() > minimum_actions) minutes_counting = minutes_counting[minutes_counting.timestamp == True] this_ts = minutes_counting.groupby(pd.Grouper(freq='1d')).count() this_ts.columns = [user_list.loc[u, 'login']] all_ts = pd.concat([all_ts, this_ts.T]) all_ts.fillna(0, inplace=True) return all_ts
def get_user_contributions(x, teams=None, remote_instance=None): """ Takes a list of neurons and returns nodes and synapses contributed by each user. Notes ----- This is essentially a wrapper for :func:`pymaid.get_contributor_statistics` - if you are also interested in e.g. construction time, review time, etc. you may want to consider using :func:`~pymaid.get_contributor_statistics` instead. Parameters ---------- x Which neurons to check. Can be either: 1. skeleton IDs (int or str) 2. neuron name (str, must be exact match) 3. annotation: e.g. 'annotation:PN right' 4. CatmaidNeuron or CatmaidNeuronList object teams dict, optional Teams to group contributions for. Users must be logins:: {'teamA': ['user1', 'user2'], 'team2': ['user3'], ...]} Users not part of any team, will be grouped as team ``'others'``. remote_instance : Catmaid Instance, optional Either pass explicitly or define globally. Returns ------- pandas.DataFrame DataFrame in which each row represents a user >>> df ... user nodes presynapses postsynapses nodes_reviewed ... 0 ... 1 Examples -------- >>> import matplotlib.pyplot as plt >>> # Get contributors for a single neuron >>> cont = pymaid.get_user_contributions(2333007) >>> # Get top 10 (by node contribution) >>> top10 = cont.iloc[:10].set_index('user') >>> # Plot as bar chart >>> ax = top10.plot(kind='bar') >>> plt.show() >>> # Plot relative contributions >>> cont = pymaid.get_user_contributions(2333007) >>> cont = cont.set_index('user') >>> # Normalise >>> cont_rel = cont / cont.sum(axis=0).values >>> # Plot contributors with >5% node contributions >>> ax = cont_rel[cont_rel.nodes > .05].plot(kind='bar') >>> plt.show() See Also -------- :func:`~pymaid.get_contributor_statistics` Gives you more basic info on neurons of interest such as total reconstruction/review time. """ if not isinstance(teams, type(None)): # Prepare teams if not isinstance(teams, dict): raise TypeError('Expected teams of type dict, got {}'.format(type(teams))) for t in teams: if not isinstance(teams[t], list): raise TypeError('Teams need to list of user logins, got {}'.format(type(teams[t]))) # Turn teams into a login -> team dict teams = {u : t for t in teams for u in teams[t]} remote_instance = utils._eval_remote_instance(remote_instance) skids = utils.eval_skids(x, remote_instance) cont = fetch.get_contributor_statistics( skids, remote_instance, separate=False) all_users = set(list(cont.node_contributors.keys( )) + list(cont.pre_contributors.keys()) + list(cont.post_contributors.keys())) stats = { 'nodes': {u: 0 for u in all_users}, 'presynapses': {u: 0 for u in all_users}, 'postsynapses': {u: 0 for u in all_users}, 'nodes_reviewed': {u: 0 for u in all_users} } for u in cont.node_contributors: stats['nodes'][u] = cont.node_contributors[u] for u in cont.pre_contributors: stats['presynapses'][u] = cont.pre_contributors[u] for u in cont.post_contributors: stats['postsynapses'][u] = cont.post_contributors[u] for u in cont.review_contributors: stats['nodes_reviewed'][u] = cont.review_contributors[u] stats = pd.DataFrame([[u, stats['nodes'][u], stats['presynapses'][u], stats['postsynapses'][u], stats['nodes_reviewed'][u]] for u in all_users], columns=['user', 'nodes', 'presynapses', 'postsynapses', 'nodes_reviewed'] ).sort_values('nodes', ascending=False).reset_index(drop=True) if isinstance(teams, type(None)): return stats stats['team'] = [teams.get(u, 'others') for u in stats.user.values] return stats.groupby('team').sum()
def in_volume(x, volume, inplace=False, mode='IN', remote_instance=None): """ Test if points/neurons are within a given CATMAID volume. Important --------- This function requires `pyoctree <https://github.com/mhogg/pyoctree>`_ which is only an optional dependency of PyMaid. If pyoctree is not installed, we will fall back to using scipy ConvexHull instead of ray casting. This is slower and may give wrong positives for concave meshes! Parameters ---------- x : list of tuples | CatmaidNeuron | CatmaidNeuronList - if ``list/numpy.array``: needs to be shape (N,3): ``[[x1,y1,z1], [x2,y2,z2], ..]`` - if ``pandas.DataFrame``: needs to have ``x,y,z`` columns volume : str | list of str | core.Volume :class:`pymaid.Volume` or name of a CATMAID volume to test. inplace : bool, optional If False, a copy of the original DataFrames/Neuron is returned. Does only apply to CatmaidNeuron or CatmaidNeuronList objects. Does apply if multiple volumes are provided. mode : 'IN' | 'OUT', optional If 'IN', parts of the neuron that are within the volume are kept. remote_instance : CATMAID instance, optional Pass if ``volume`` is a volume name. Returns ------- CatmaidNeuron If input is CatmaidNeuron or CatmaidNeuronList, will return subset of the neuron (nodes and connectors) that are within given volume. list of bools If input is list or DataFrame, returns boolean: ``True`` if in volume, ``False`` if not. dict If multiple volumes are provided as list of strings, results will be returned in dictionary with volumes as keys. Examples -------- Advanced example: Check with which antennal lobe glomeruli a neuron intersects. >>> # First prepare some volume names >>> gloms = ['DA1','DA2', 'DA3', 'DA4l' ,'DL4', 'VA2', 'DC3', 'VM7v', ... 'DC4', 'DC1', 'DM5', 'D', 'VM2', 'VC4', 'VL1', 'DM3', 'DL1', 'DP1m'] >>> # Get neuron to check >>> n = pymaid.get_neuron('name:PN unknown glomerulus', ... remote_instance = remote_instance ) >>> # Calc intersections with each of the above glomeruli >>> res = pymaid.in_volume(n, gloms, remote_instance=remote_instance) >>> # Extract cable >>> cable = {v: res[v].cable_length for v in res} >>> # Plot graph >>> import pandas as pd >>> import matplotlib.pyplot as plt >>> df = pd.DataFrame( list( cable.values() ), ... index = list( cable.keys() ) ... ) >>> df.boxplot() >>> plt.show() """ remote_instance = utils._eval_remote_instance(remote_instance) if isinstance( volume, (list, dict, np.ndarray)) and not isinstance(volume, core.Volume): # Turn into dict if not isinstance(volume, dict): volume = {v.name: v for v in volume} data = dict() for v in config.tqdm(volume, desc='Volumes', disable=config.pbar_hide, leave=config.pbar_leave): data[v] = in_volume(x, volume[v], remote_instance=remote_instance, inplace=False, mode=mode) return data if isinstance(volume, str): volume = fetch.get_volume(volume, remote_instance) if isinstance(x, pd.DataFrame): points = x[['x', 'y', 'z']].values elif isinstance(x, core.CatmaidNeuron): in_v = in_volume(x.nodes[['x', 'y', 'z']].values, volume, mode=mode) # If mode is OUT, invert selection if mode == 'OUT': in_v = ~np.array(in_v) x = graph_utils.subset_neuron(x, x.nodes[in_v].treenode_id.values, inplace=inplace) if not inplace: return x else: return elif isinstance(x, core.CatmaidNeuronList): for n in x: n = in_volume(n, volume, inplace=inplace, mode=mode) if not inplace: return x else: return else: points = x if points.ndim != 2 or points.shape[1] != 3: raise ValueError('Points must be array of shape (N,3).') if pyoctree: return _in_volume_ray(points, volume) else: logger.warning( 'Package pyoctree not found. Falling back to ConvexHull.') return _in_volume_convex(points, volume, approximate=False)
def watch_network(x, sleep=3, n_circles=1, min_pre=2, min_post=2, layout=None, remote_instance=None, verbose=True): """ Loads and **continuously updates** a network into Cytoscape. Use CTRL-C to stop. Parameters ---------- x : skeleton IDs | CatmaidNeuron/List Seed neurons to keep track of. sleep : int | None, optional Time in seconds to sleep after each update. n_circles : int, optional Number of circles around seed neurons to include in the network. See also :func:`pymaid.get_nth_partners`. Set to ``None | 0 | False`` to only update seed nodes. min_pre/min_post : int, optional Synapse threshold to apply to ``n_circles``. Set to -1 to not get any pre-/post synaptic partners. Please note: as long as there is a single above-threshold connection, a neuron will be included. This does not remove other, sub-threshold connections. layout : str | None, optional Name of a Cytoscape layout. If provided, will update the network's layout on every change. remote_instance : CatmaidInstance, optional verbose : bool, optional If True, will log changes made to the network. Returns ------- Nothing Examples -------- >>> import pymaid >>> import pymaid.cytoscape as cytomaid >>> rm = pymaid.CatmaidInstance('server_url', 'http_user', ... 'http_pw', 'auth_token') >>> # Don't forget to start Cytoscape! >>> cytomaid.watch_network('annotation:glomerulus DA1', min_pre=5, ... min_post=-1, sleep=5) >>> # Use CTRL-C to stop the loop """ cy = get_client() remote_instance = utils._eval_remote_instance(remote_instance) sleep = 0 if not sleep else sleep x = utils.eval_skids(x, remote_instance=remote_instance) # Generate the initial network if n_circles: to_add = fetch.get_nth_partners( x, n_circles=n_circles, min_pre=min_pre, min_post=min_post, remote_instance=remote_instance).skeleton_id else: to_add = [] g = graph.network2nx(np.concatenate([x, to_add]).astype(int), remote_instance=remote_instance) network = generate_network(g, clear_session=True, apply_style=False, layout=layout) if layout: cy.layout.apply(name=layout, network=network) logger.info('Watching network. Use CTRL-C to stop.') if remote_instance.caching: logger.warning('Caching disabled.') remote_instance.caching = False utils.set_loggers('WARNING') while True: if n_circles: to_add = fetch.get_nth_partners( x, n_circles=n_circles, min_pre=min_pre, min_post=min_post, remote_instance=remote_instance).skeleton_id else: to_add = [] g = graph.network2nx(np.concatenate([x, to_add]).astype(int), remote_instance=remote_instance) # Add nodes that came in new ntable = network.get_node_table() nodes_to_add = [s for s in g.nodes if s not in ntable.id.values] if nodes_to_add: network.add_nodes(nodes_to_add) # Update neuron names ntable = network.get_node_table() names = ntable.set_index('name').neuron_name.to_dict() names.update({s: g.nodes[s]['neuron_name'] for s in g.nodes}) ntable['id'] = ntable.name ntable['neuron_name'] = ntable.name.map(names) network.update_node_table(ntable, data_key_col='name', network_key_col='name') # Remove nodes that do not exist anymore ntable = network.get_node_table() nodes_to_remove = ntable[~ntable['id'].isin(g.nodes)] if not nodes_to_remove.empty: for v in nodes_to_remove.SUID.values: network.delete_node(v) # Remove edges etable = network.get_edge_table() edges_removed = 0 for e in etable.itertuples(): if (e.source, e.target) not in g.edges: edges_removed += 1 network.delete_edge(e.SUID) # Add edges etable = network.get_edge_table() edges = [(s, t) for s, t in zip(etable.source.values, etable.target.values)] skid_to_SUID = ntable.set_index('name').SUID.to_dict() edges_to_add = [] for e in set(g.edges) - set(edges): edges_to_add.append({ 'source': skid_to_SUID[e[0]], 'target': skid_to_SUID[e[1]], 'interaction': None, 'directed': True }) if edges_to_add: network.add_edges(edges_to_add) # Fix table and modify weights if applicable etable = network.get_edge_table() if not etable.loc[etable.source.isnull()].empty: etable.loc[etable.source.isnull(), 'source'] = etable.loc[ etable.source.isnull(), 'name'].map(lambda x: x[:x.index('(') - 1]) etable.loc[etable.target.isnull(), 'target'] = etable.loc[ etable.target.isnull(), 'name'].map(lambda x: x[x.index(')') + 2:]) new_weights = [ g.edges[e]['weight'] for e in etable[['source', 'target']].values ] weights_modified = [ new_w for new_w, old_w in zip(new_weights, etable.weight.values) if new_w != old_w ] etable['weight'] = new_weights # For some reason, there os no official wrapper for this, so we have to get our hands dirty network._CyNetwork__update_table('edge', etable, network_key_col='SUID', data_key_col='SUID') # If changes were made, give some feedback and/or change layout if nodes_to_add or not nodes_to_remove.empty or edges_to_add or edges_removed or weights_modified: if verbose: logger.info( '{} - nodes added/removed: {}/{}; edges added/removed/modified {}/{}/{}' .format( datetime.datetime.now(), len(nodes_to_add), len(nodes_to_remove), len(edges_to_add), edges_removed, len(weights_modified), )) if layout: cy.layout.apply(name=layout, network=network) # ZzzZzzzZ time.sleep(sleep)
def adjacency_matrix(s, t=None, remote_instance=None, source_grp={}, target_grp={}, syn_threshold=None, syn_cutoff=None, use_connectors=False): """ Generate adjacency matrix for synaptic connections between sets of neurons. Directional: sources = rows, targets = columns. Parameters ---------- s Source neurons as single or list of either: 1. skeleton IDs (int or str) 2. neuron name (str, exact match) 3. annotation: e.g. 'annotation:PN right' 4. CatmaidNeuron or CatmaidNeuronList object t Optional. Target neurons as single or list of either: 1. skeleton IDs (int or str) 2. neuron name (str, exact match) 3. annotation: e.g. ``'annotation:PN right'` 4. CatmaidNeuron or CatmaidNeuronList object If not provided, ``source neurons = target neurons``. remote_instance : CATMAID instance, optional syn_cutoff : int, optional If set, will cut off connections above given value. syn_threshold : int, optional If set, will ignore connections with less synapses. source_grp : dict, optional Use to collapse sources into groups. Can be either: 1. ``{group1: [neuron1, neuron2, ... ], ..}`` 2. ``{neuron1: group1, neuron2 : group2, ..}`` ``syn_cutoff`` and ``syn_threshold`` are applied BEFORE grouping! target_grp : dict, optional See ``source_grp`` for possible formats. use_connectors : bool, optional If True AND ``s`` or ``t`` are ``CatmaidNeuron/List``, restrict adjacency matrix to their connectors. Use if e.g. you are using pruned neurons. **Important**: This does not work if you have multiple fragments per neuron! Returns ------- matrix : ``pandas.Dataframe`` See Also -------- :func:`~pymaid.group_matrix` More fine-grained control over matrix grouping. :func:`~pymaid.adjacency_from_connectors` Use this function if you are working with multiple fragments per neuron. Examples -------- Generate and plot a adjacency matrix: >>> import seaborn as sns >>> import matplotlib.pyplot as plt >>> import pymaid >>> rm = pymaid.CatmaidInstance(url, user, pw,, token) >>> neurons = pymaid.get_neurons('annotation:test') >>> mat = pymaid.adjacency_matrix( neurons ) >>> g = sns.heatmap(adj_mat, square=True) >>> g.set_yticklabels(g.get_yticklabels(), rotation = 0, fontsize = 7) >>> g.set_xticklabels(g.get_xticklabels(), rotation = 90, fontsize = 7) >>> plt.show() Cut neurons into axon dendrites and compare their connectivity: >>> # Get a set of neurons >>> nl = pymaid.get_neurons('annnotation:type_16_candidates') >>> # Split into axon dendrite by using a tag >>> nl.reroot(nl.soma) >>> nl_axon = nl.prune_proximal_to('axon', inplace=False) >>> nl_dend = nl.prune_distal_to('axon', inplace=False) >>> # Get a list of the downstream partners >>> cn_table = pymaid.get_partners(nl) >>> ds_partners = cn_table[ cn_table.relation == 'downstream' ] >>> # Take the top 10 downstream partners >>> top_ds = ds_partners.iloc[:10].skeleton_id.values >>> # Generate separate adjacency matrices for axon and dendrites >>> adj_axon = pymaid.adjacency_matrix(nl_axon, top_ds, use_connectors=True ) >>> adj_dend = pymaid.adjacency_matrix(nl_dend, top_ds, use_connectors=True ) >>> # Rename rows and merge dataframes >>> adj_axon.index += '_axon' >>> adj_dend.index += '_dendrite' >>> adj_merged = pd.concat([adj_axon, adj_dend], axis=0) >>> # Plot heatmap using seaborn >>> ax = sns.heatmap(adj_merged) >>> plt.show() """ remote_instance = utils._eval_remote_instance(remote_instance) if t is None: t = s neuronsA = utils.eval_skids(s, remote_instance=remote_instance) neuronsB = utils.eval_skids(t, remote_instance=remote_instance) # Make sure neurons are integers neurons = list(set([int(n) for n in (neuronsA + neuronsB)])) neuronsA = [int(n) for n in neuronsA] neuronsB = [int(n) for n in neuronsB] # Make sure neurons are unique neuronsA = sorted(set(neuronsA), key=neuronsA.index) neuronsB = sorted(set(neuronsB), key=neuronsB.index) logger.info('Retrieving and filtering connectivity...') if use_connectors and ( isinstance(s, (core.CatmaidNeuron, core.CatmaidNeuronList)) or isinstance(t, (core.CatmaidNeuron, core.CatmaidNeuronList))): edges = _edges_from_connectors(s, t, remote_instance=remote_instance) else: edges = fetch.get_edges(neurons, remote_instance=remote_instance) # Turn into a adjacency matrix matrix = edges.pivot(values='weight', columns='target_skid', index='source_skid').fillna(0) # Filter to actual sources and targets matrix = matrix.reindex(neuronsA, columns=neuronsB, fill_value=0) # Apply cutoff and threshold matrix = matrix.clip(upper=syn_cutoff) if syn_threshold: matrix[matrix < syn_threshold] = 0 matrix.datatype = 'adjacency_matrix' if source_grp or target_grp: matrix = group_matrix(matrix, source_grp, target_grp, drop_ungrouped=False) logger.info('Finished!') return matrix
def adjacency_from_connectors(source, target=None, remote_instance=None): """ Regenerates adjacency matrices from neurons' connectors. Notes ----- This function creates an adjacency matrix from scratch using just the neurons' connectors. This function is able to deal with non-unique skeleton IDs (most other functions are not). Use it e.g. when you split neurons into multiple fragments. Parameters ---------- source,target : skeleton IDs | CatmaidNeuron | CatmaidNeuronList Neuron(s) for which to generate adjacency matrix. If ``target==None``, will use ``target=source``. Returns ------- pandas.DataFrame Matrix holding possible synaptic contacts. Sources are rows, targets are columns. Labels are skeleton IDs. Order is preserved. >>> df target1 target2 target3 ... source1 5 1 0 source2 10 20 5 source3 4 3 15 ... See Also -------- :func:`~pymaid.adjacency_matrix` If you are working with "intact" neurons. Much faster! :func:`~pymaid.filter_connectivity` Use this function if you have only a single fragment per neuron (e.g. just the axon). Also way faster. Examples -------- >>> # Fetch some neurons >>> x = pymaid.get_neuron('annotation:PD2a1/b1') >>> # Split into axon / dendrites >>> x.reroot(x.soma) >>> split = pymaid.split_axon_dendrite(x) >>> # Regenerate all-by-all adjacency matrix >>> adj = pymaid.adjacency_from_connectors(split) >>> # Skeleton IDs are non-unique but column/row order = input order: >>> # in this example, the first occurrence is axon, the second dendrites >>> adj.head() """ remote_instance = utils._eval_remote_instance(remote_instance) if not isinstance(source, (core.CatmaidNeuron, core.CatmaidNeuronList)): skids = utils.eval_skids(source) source = fetch.get_neuron(skids, remote_instance=remote_instance) if isinstance(target, type(None)): target = source elif not isinstance(target, (core.CatmaidNeuron, core.CatmaidNeuronList)): skids = utils.eval_skids(target) target = fetch.get_neuron(skids, remote_instance=remote_instance) if isinstance(source, core.CatmaidNeuron): source = core.CatmaidNeuronList(source) if isinstance(target, core.CatmaidNeuron): target = core.CatmaidNeuronList(target) # Generate empty adjacency matrix adj = np.zeros((len(source), len(target))) # Get connector details for all neurons all_cn = list( set( np.append(source.connectors.connector_id.values, target.connectors.connector_id.values))) cn_details = fetch.get_connector_details(all_cn) # Now go over all source neurons and process connections for i, s in enumerate( config.tqdm(source, desc='Processing', disable=config.pbar_hide, leave=config.pbar_leave)): # Get all connectors presynaptic for this source this_cn = cn_details[ (cn_details.presynaptic_to == int(s.skeleton_id)) & (cn_details.connector_id.isin(s.connectors.connector_id))] # Go over all target neurons for k, t in enumerate(target): t_tn = set(t.nodes.treenode_id.values) t_post = t.postsynapses.connector_id.values # Extract number of connections from source to this target this_t = this_cn[this_cn.connector_id.isin(t_post)] # Now figure out how many links are between this connector and # the target n_links = sum([ len(t_tn & set(r.postsynaptic_to_node)) for r in this_t.itertuples() ]) adj[i][k] = n_links return pd.DataFrame(adj, index=source.skeleton_id, columns=target.skeleton_id)
def cn_table_from_connectors(x, remote_instance=None): """ Generate connectivity table from neurons' connectors. Notes ----- This function creates the connectivity table from scratch using just the neurons' connectors. This function is able to deal with non-unique skeleton IDs (most other functions won't). Use it e.g. when you split neurons into multiple fragments. *The order of the input CatmaidNeuronList is preserved!* Parameters ---------- x : CatmaidNeuron | CatmaidNeuronList Neuron(s) for which to generate connectivity table. Returns ------- pandas.DataFrame DataFrame in which each row represents a neuron and the number of synapses with the query neurons: >>> df neuron_name skeleton_id relation total skid1 skid2 ... 0 name1 skid1 upstream n_syn n_syn ... 1 name2 skid2 downstream n_syn n_syn .. 2 name3 skid3 usptream n_syn n_syn . ... ... ``relation`` can be ``'upstream'`` (incoming), ``'downstream'`` (outgoing), ``'attachment'`` or ``'gapjunction'`` (gap junction). See Also -------- :func:`~pymaid.get_partners` If you are working with "intact" neurons. Much faster! :func:`~pymaid.filter_connectivity` Use this function if you have only a single fragment per neuron (e.g. just the axon). Also way faster. Examples -------- >>> # Fetch some neurons >>> x = pymaid.get_neuron('annotation:PD2a1/b1') >>> # Split into axon / dendrites >>> x.reroot(x.soma) >>> split = pymaid.split_axon_dendrite(x) >>> # Regenerate cn_table >>> cn_table = pymaid.cn_table_from_connectors(split) >>> # Skeleton IDs are non-unique but column order = input order: >>> # in this example, the first occurrence is axon, the second dendrites >>> cn_table.head() """ remote_instance = utils._eval_remote_instance(remote_instance) if not isinstance(x, (core.CatmaidNeuron, core.CatmaidNeuronList)): raise TypeError('Need CatmaidNeuron/List, got "{}"'.format(type(x))) if isinstance(x, core.CatmaidNeuron): x = core.CatmaidNeuronList(x) # Get connector details for all neurons all_cn = x.connectors.connector_id.values cn_details = fetch.get_connector_details(all_cn) # Remove connectors for which there are either no pre- or no postsynaptic # neurons cn_details = cn_details[cn_details.postsynaptic_to.apply(len) != 0] cn_details = cn_details[~cn_details.presynaptic_to.isnull()] # We need to map treenode ID to skeleton ID in cases where there are more # links (postsynaptic_to_node) than targets (postsynaptic_to) multi_links = cn_details[cn_details.postsynaptic_to.apply(len) < cn_details.postsynaptic_to_node.apply(len)] if not multi_links.empty: tn_to_fetch = [ tn for l in multi_links.postsynaptic_to_node for tn in l ] tn_to_skid = fetch.get_skid_from_treenode( tn_to_fetch, remote_instance=remote_instance) else: tn_to_skid = {} # Collect all pre and postsynaptic neurons all_pre = cn_details[~cn_details.presynaptic_to. isin(x.skeleton_id.astype(int))] all_post = cn_details[cn_details.presynaptic_to.isin( x.skeleton_id.astype(int))] all_partners = np.append( all_pre.presynaptic_to.values, [n for l in all_post.postsynaptic_to.values for n in l]) us_dict = {} ds_dict = {} # Go over over all neurons and process connectivity for i, n in enumerate( config.tqdm(x, desc='Processing', disable=config.pbar_hide, leave=config.pbar_leave)): # First prepare upstream partners: # Get all treenodes this_tn = set(n.nodes.treenode_id.values) # Prepare upstream partners this_us = all_pre[all_pre.connector_id.isin( n.connectors.connector_id.values)] # Get the number of all links per connector this_us['n_links'] = [ len(this_tn & set(r.postsynaptic_to_node)) for r in this_us.itertuples() ] # Group by input and store as dict us_dict[n] = this_us.groupby('presynaptic_to').n_links.sum().to_dict() this_us = this_us.groupby('presynaptic_to').n_links.sum() # Now prepare downstream partners: # Get all downstream connectors this_ds = all_post[all_post.presynaptic_to == int(n.skeleton_id)] # Prepare dict ds_dict[n] = {p: 0 for p in all_partners} # Easy cases first (single link to target per connector) is_single = this_ds.postsynaptic_to.apply( len) >= this_ds.postsynaptic_to_node.apply(len) for r in this_ds[is_single].itertuples(): for s in r.postsynaptic_to: ds_dict[n][s] += 1 # Now hard cases - will have to look up skeleton ID via treenode ID for r in this_ds[~is_single].itertuples(): for s in r.postsynaptic_to_node: ds_dict[n][tn_to_skid[s]] += 1 # Now that we have all data, let's generate the table us_table = pd.DataFrame.from_dict(us_dict) ds_table = pd.DataFrame.from_dict(ds_dict) # Make sure we keep the order of the original neuronlist us_table = us_table[[n for n in x]] us_table.columns = [n.skeleton_id for n in us_table.columns] ds_table = ds_table[[n for n in x]] ds_table.columns = [n.skeleton_id for n in ds_table.columns] ds_table['relation'] = 'downstream' us_table['relation'] = 'upstream' # Generate table cn_table = pd.concat([us_table, ds_table], axis=0) # Replace NaN with 0 cn_table = cn_table.fillna(0) # Make skeleton ID a column cn_table = cn_table.reset_index(drop=False) cn_table.columns = ['skeleton_id'] + list(cn_table.columns[1:]) # Add names names = fetch.get_names(cn_table.skeleton_id.values) cn_table['neuron_name'] = [ names[str(s)] for s in cn_table.skeleton_id.values ] cn_table['total'] = cn_table[x.skeleton_id].sum(axis=1) # Drop rows with 0 synapses (e.g. if neuron is only up- but not downstream) cn_table = cn_table[cn_table.total > 0] # Sort by number of synapses cn_table = cn_table.sort_values(['relation', 'total'], ascending=False) # Sort columnes cn_table = cn_table[['neuron_name', 'skeleton_id', 'relation', 'total'] + list(set(x.skeleton_id))] return cn_table
def predict_connectivity(source, target, method='possible_contacts', remote_instance=None, **kwargs): """ Calculates potential synapses from source onto target neurons. Based on a concept by Alex Bates. Parameters ---------- source,target : CatmaidNeuron | CatmaidNeuronList Neuron(s) for which to compute potential connectivity. This is unidirectional: source -> target. method : 'possible_contacts' Method to use for calculations. See Notes. **kwargs 1. For method = 'possible_contacts': - ``dist`` to set distance between connectors and treenodes manually. - ``stdev`` to set number of standard-deviations of average distance. Default = 2. Notes ----- Method ``possible_contacts``: 1. Calculating mean distance ``d`` (connector->treenode) at which connections between neurons A and neurons B occur. 2. For all presynapses of neurons A, check if they are within `stdev` (default=2) standard deviations of ``d`` of a neurons B treenode. Returns ------- pandas.DataFrame Matrix holding possible synaptic contacts. Sources are rows, targets are columns. >>> df target1 target2 target3 ... source1 5 1 0 source2 10 20 5 source3 4 3 15 ... """ remote_instance = utils._eval_remote_instance(remote_instance) if not remote_instance: try: remote_instance = source._remote_instance except: pass for _ in [source, target]: if not isinstance(_, (core.CatmaidNeuron, core.CatmaidNeuronList)): raise TypeError('Need CatmaidNeuron/List, got "{}"'.format( type(_))) if isinstance(source, core.CatmaidNeuron): source = core.CatmaidNeuronList(source) if isinstance(target, core.CatmaidNeuron): target = core.CatmaidNeuronList(target) allowed_methods = ['possible_contacts'] if method not in allowed_methods: raise ValueError('Unknown method "{0}". Allowed methods: "{0}"'.format( method, ','.join(allowed_methods))) matrix = pd.DataFrame(np.zeros((source.shape[0], target.shape[0])), index=source.skeleton_id, columns=target.skeleton_id) # First let's calculate at what distance synapses are being made cn_between = fetch.get_connectors_between(source, target, remote_instance=remote_instance) if kwargs.get('dist', None): distances = kwargs.get('dist') elif cn_between.shape[0] > 0: logger.warning('No ') cn_locs = np.vstack(cn_between.connector_loc.values) tn_locs = np.vstack(cn_between.treenode2_loc.values) distances = np.sqrt(np.sum((cn_locs - tn_locs)**2, axis=1)) logger.info( 'Average connector->treenode distances: {:.2f} +/- {:.2f} nm'. format(distances.mean(), distances.std())) else: logger.warning('No existing connectors to calculate average \ connector->treenode distance found. Falling \ back to default of 1um. Use <stdev> argument\ to set manually.') distances = 1000 # Calculate distances threshold n_std = kwargs.get('n_std', 2) dist_threshold = np.mean(distances) + n_std * np.std(distances) with config.tqdm(total=len(target), desc='Predicting', disable=config.pbar_hide, leave=config.pbar_leave) as pbar: for t in target: # Create cKDTree for target tree = scipy.spatial.cKDTree(t.nodes[['x', 'y', 'z']].values, leafsize=10) for s in source: # Query against presynapses dist, ix = tree.query(s.presynapses[['x', 'y', 'z']].values, k=1, distance_upper_bound=dist_threshold, n_jobs=-1) # Calculate possible contacts possible_contacts = sum(dist != float('inf')) matrix.at[s.skeleton_id, t.skeleton_id] = possible_contacts pbar.update(1) return matrix.astype(int)
def crop_neuron(x, output, dimensions=(1000, 1000), interpolate_z_res=40, remote_instance=None): """ Crops and saves EM tiles following a neuron's segments. Parameters ---------- x : pymaid.CatmaidNeuron Neuron to cut out. output : str File or folder. dimensions : tuple of int, optional Dimensions of square to cut out in nanometers. interpolate_z_res : int | None, optional If not none, will interpolate in Z direction to given resolution. Use this to interpolate virtual nodes. remote_instance : pymaid.CatmaidInstance, optional """ if isinstance(x, core.CatmaidNeuronList) and len(x) == 1: x = x[0] if not isinstance(x, core.CatmaidNeuron): raise TypeError('Need a single CatmaidNeuron, got "{0}".'.type(x)) if len(dimensions) != 2: raise ValueError('Need two dimensions, got {0}'.format( len(dimensions))) # Evalutate remote instance remote_instance = utils._eval_remote_instance(remote_instance) # Prepare treenode table to be indexed by treenode_id this_tn = x.nodes.set_index('treenode_id') # Iterate over neuron's segments bboxes = [] for seg in x.segments: # Get treenode coordinates center_coords = this_tn.loc[seg, ['x', 'y', 'z']].values # If a z resolution for interpolation is given, interpolate virtual nodes if interpolate_z_res: interp_coords = center_coords[0:1] # Go over all treenode -> parent pairs for i, (co, next_co) in enumerate( zip(center_coords[:-1], center_coords[1:])): # If nodes are more than interpolate_z_res nm away from another if math.fabs(co[2] - next_co[2]) >= (2 * interpolate_z_res): # Get steps we would expect to be there steps = int( math.fabs(co[2] - next_co[2]) / interpolate_z_res) # If we're going anterior, we need to inverse step size if co[2] < next_co[2]: step_size = interpolate_z_res else: step_size = -interpolate_z_res # Interpolate coordinates new_co = [ (co[0] + int((next_co[0] - co[0]) / steps * (i + 1)), co[1] + int( (next_co[1] - co[1]) / steps * (i + 1)), z) for i, z in enumerate( range(co[2] + step_size, next_co[2], step_size)) ] # Track new coordinates interp_coords = np.append(interp_coords, new_co, axis=0) # Add next coordinate interp_coords = np.append(interp_coords, [next_co], axis=0) # Use interpolated coords center_coords = interp_coords # Turn into bounding boxes: left, right, top, bottom, z bbox = np.array([[ co[0] - dimensions[0] / 2, co[0] + dimensions[0] / 2, co[1] - dimensions[1] / 2, co[1] + dimensions[1] / 2, co[2] ] for co in center_coords]).astype(int) bboxes += list(bbox) # Generate tile job job = LoadTiles(bboxes, zoom_level=0, coords='NM', remote_instance=remote_instance) # job.generate_img() return job
def take_snapshot(x, skeleton_data=True, cn_table=False, node_details=False, adjacency_matrix=True, remote_instance=None, cn_details=True, annotations=False): """ Take a snapshot of CATMAID data associated with a set of neurons. Important --------- If you pass Catmaidneuron/List that have been modified (e.g. pruned), other data (e.g. connectivity, etc) will be subset as well if applicable. If your CatmaidNeuron/List is still naive, you might want to just pass the skeleton ID(s) to speed things up. Parameters ---------- x : skeleton IDs | CatmaidNeuron/List Neurons for which to retrieve data. skeleton_data : bool, optional Include 3D skeleton data. cn_table : bool, optional Include connectivity table. Covers all neurons connected to input neurons. node_details : bool, optional Include treenode and connector details. adjacency_matrix : bool, optional Include adjacency matrix covering the input neurons. cn_details : bool, optional Include connector details. annotations : bool, optional Include neuron annotations. remote_instance : Catmaid Instance, optional Either pass explicitly or define globally. Will obviously not be added to the snapshot! Returns ------- pandas Series Examples -------- See Also -------- :func:`~pymaid.load_snapshot` Use to load a snapshot file. """ remote_instance = utils._eval_remote_instance(remote_instance) skids = utils.eval_skids(x, remote_instance) snapshot = pd.Series() # Add Coordinates Universal Time snapshot['utc_date'] = datetime.datetime.utcnow() # Add pymaid version snapshot['pymaid_version'] = init.__version__ # Add skeleton data if skeleton_data: if not isinstance(x, (core.CatmaidNeuronList, core.CatmaidNeuron)): skdata = fetch.get_neurons(skids, remote_instance=remote_instance) else: skdata = x if isinstance(skdata, core.CatmaidNeuron): skdata = core.CatmaidNeuronList(skdata) snapshot['skeleton_data'] = skdata.to_dataframe() # Add connectivity table if cn_table: if isinstance(x, (core.CatmaidNeuron, core.CatmaidNeuronList)): snapshot['cn_table'] = connectivity.cn_table_from_connectors( x, remote_instance=remote_instance) else: # Add connectivity table snapshot['cn_table'] = fetch.get_partners( x, remote_instance=remote_instance) # Add connectivity table if node_details: snapshot['treenode_details'] = fetch.get_node_details( skdata.nodes.treenode_id.values, remote_instance=remote_instance) snapshot['connector_details'] = fetch.get_node_details( skdata.connectors.connector_id.values, remote_instance=remote_instance) # Add adjacency matrix if adjacency_matrix: if isinstance(x, (core.CatmaidNeuron, core.CatmaidNeuronList)): snapshot[ 'adjacency_matrix'] = connectivity.adjacency_from_connectors( x, remote_instance=remote_instance) else: # Add connectivity table snapshot['adjacency_matrix'] = connectivity.adjacency_matrix( x, remote_instance=remote_instance) # Add annotations if annotations: snapshot['annotations'] = fetch.get_annotations(skids) # Add connector details if cn_details: snapshot['cn_details'] = fetch.get_connector_details(skdata) return snapshot
def network2igraph(x, remote_instance=None, threshold=1): """ Generates iGraph graph for neuron connectivity. Requires iGraph to be installed. Parameters ---------- x Catmaid Neurons as: 1. list of skeleton IDs (int or str) 2. list of neuron names (str, exact match) 3. annotation(s): e.g. 'annotation:PN right' 4. CatmaidNeuronList object 5. Adjacency matrix (pd.DataFrame, rows=sources, columns=targets) remote_instance : CATMAID instance, optional Either pass directly to function or define globally as 'remote_instance'. threshold : int, optional Connections weaker than this will be excluded . Returns ------- igraph.Graph(directed=True) NetworkX representation of the network. Examples -------- >>> import pymaid >>> import igraph >>> g = pymaid.network2igraph('annotation:large network', remote_instance=rm) >>> # Plot graph >>> igraph.plot(g) >>> # Plot with edge width >>> igraph.plot(g, **{'edge_width': [ w/10 for w in g.es['weight'] ] }) >>> # Plot with edge label >>> igraph.plot(g, **{'edge_label': g.es['weight'] }) >>> # Save as graphml to import into e.g. Cytoscape >>> g.save('graph.graphml') """ if igraph is None: raise ImportError('igraph must be installed to use this function.') if isinstance(x, (core.CatmaidNeuronList, list, np.ndarray, str)): remote_instance = utils._eval_remote_instance(remote_instance) skids = utils.eval_skids(x, remote_instance=remote_instance) indices = {int(s): i for i, s in enumerate(skids)} # Fetch edges edges = fetch.get_edges(skids, remote_instance=remote_instance) # Reformat into igraph format edges_by_index = [[ indices[e.source_skid], indices[e.target_skid] ] for e in edges[edges.weight >= threshold].itertuples()] weight = edges[edges.weight >= threshold].weight.tolist() elif isinstance(x, pd.DataFrame): skids = list(set(x.columns.tolist() + x.index.tolist())) # Generate edge list edges = [[i, j] for i in x.index.tolist() for j in x.columns.tolist() if x.loc[i, j] >= threshold] edges_by_index = [[skids.index(e[0]), skids.index(e[1])] for e in edges] weight = [ x.loc[i, j] for i in range(x.shape[0]) for j in range(x.shape[1]) if x.loc[i, j] >= threshold ] else: raise ValueError('Unable to process data of type "{0}"'.format( type(x))) # Generate igraph and assign custom properties g = igraph.Graph(directed=True) g.add_vertices(len(skids)) g.add_edges(edges_by_index) g.vs['node_id'] = skids # g.vs['neuron_name'] = g.vs['label'] = neuron_names g.es['weight'] = weight return g
def network2nx(x, remote_instance=None, threshold=1): """ Generates NetworkX graph for neuron connectivity. Parameters ---------- x Catmaid Neurons as: 1. list of skeleton IDs (int or str) 2. list of neuron names (str, exact match) 3. annotation(s): e.g. 'annotation:PN right' 4. CatmaidNeuronList object 5. Adjacency matrix (pd.DataFrame, rows=sources, columns=targets) remote_instance : CATMAID instance, optional Either pass directly to function or define globally as 'remote_instance'. threshold : int, optional Connections weaker than this will be excluded. Returns ------- networkx.DiGraph NetworkX representation of the network. Examples -------- >>> import matplotlib.pyplot as plt >>> import networkx as nx >>> import numpy as np >>> g = pymaid.network2graph('annotation:large network') >>> # Plot with default settings >>> nx.draw(g) >>> plt.show() >>> # Plot with neuron names >>> labels = nx.get_node_attributes(g, 'neuron_name') >>> nx.draw(g, labels=labels, with_labels=True) >>> plt.show() >>> # Plot with layout >>> layout = nx.circular_layout(g) >>> nx.draw(g, pos=layout) >>> plt.show() >>> # Plot with edge weights >>> nx.draw_networkx_nodes(g, pos=layout) >>> weight = np.array(list(nx.get_edge_attributes(g, 'weight').values())) >>> nx.draw_networkx_edges(g, pos=layout, width=weight/50) >>> plt.show() """ if isinstance(x, (core.CatmaidNeuronList, list, np.ndarray, str)): remote_instance = utils._eval_remote_instance(remote_instance) skids = utils.eval_skids(x, remote_instance=remote_instance) # Fetch edges edges = fetch.get_edges(skids, remote_instance=remote_instance) # Reformat into networkx format edges = [[ str(e.source_skid), str(e.target_skid), { 'weight': e.weight } ] for e in edges[edges.weight >= threshold].itertuples()] elif isinstance(x, pd.DataFrame): # We have to account for the fact that some might not be skids skids = [] for s in list(set(x.columns.tolist() + x.index.tolist())): try: skids.append(int(s)) except BaseException: pass # Generate edge list edges = [[str(s), str(t), { 'weight': x.loc[s, t] }] for s in x.index.values for t in x.columns.values if x.loc[s, t] >= threshold] else: raise ValueError('Unable to process data of type "{0}"'.format( type(x))) # Generate node dictionary names = fetch.get_names(skids, remote_instance=remote_instance) nodes = [[str(s), {'neuron_name': names.get(s, s)}] for s in skids] # Generate graph and assign custom properties g = nx.DiGraph() g.add_nodes_from(nodes) g.add_edges_from(edges) return g