from archimedes.functions.dataflow import input_var, input_json, output_json, parseModelAttr, buildTargetPath from archimedes.functions.magma import question, connect from archimedes.functions.list import unique, flatten from archimedes.functions.environment import project_name pdat = input_json("project_data")[project_name] selection_options = pdat['selection_options'] seq_target = parseModelAttr(pdat['seq_h5_counts_data']) q_start = [seq_target['model'], ['::has', seq_target['attribute']], '::all'] magma = connect() options = dict([[ key, unique( flatten( question(magma, q_start + buildTargetPath(selection_options[key], pdat)))) ] for key in list(selection_options.keys())]) output_json(options, 'selection_options')
# then follow the link to 'biospecimen' (1:many; but note this link type-info is not actually needed!), # = ['biospecimen_group', 'biospecimen'] # then from there to travel up to that model's parent, 'subject'. # = ['biospecimen_group', 'biospecimen', 'subject'] 'experiment': ['sample', 'patient', 'experiment'], 'patient': ['sample', 'patient'], 'sample': ['sample'], 'sc_rna_seq': [] }, 'color_options': { # Format = <Label for the color-by drop down>: '<model>#<attribute>' # Cluster, Tube, and Gene are standard options that do not need to be added here! 'Indication': 'experiment#name', 'Tissue': 'sample#tissue_type', 'Compartment': 'sc_rna_seq#biospecimen', 'Chemistry': 'sc_rna_seq#chemistry', 'Frozen tissue?': 'patient#ffpe_frozen' # age, sex at birth, age at diag, bmi, smoker status, alcohol use, race, ethnicity, *past medical history (up to 10 per), time on ice }, 'selection_options': { # Format = <Label of this selection item>: '<model>#<attribute>' 'Indication': 'experiment#name', 'Tissue': 'sample#tissue_type', 'Compartment': 'sc_rna_seq#biospecimen', 'Chemistry': 'sc_rna_seq#chemistry' } } } output_json(project_data, "project_data")
selected = input_json('selected_options') magma = connect() # Create filters for all the 'select-bys' that the value for this attribute # must be among the options selected in the previous step. filters = [] for target in list(selection_atts.keys()): if len(selected[target]) > 0: filters.append( buildTargetPath( selection_atts[target], pdat ) + ['::in', selected[target]] ) seq_target = parseModelAttr(pdat['seq_h5_counts_data']) tube_records = unique(question( magma, [ seq_target['model'], [ '::has', seq_target['attribute']], *filters, '::all', '::identifier' ] )) if len(tube_records) < 1: raise RuntimeError('No records with data meet the selected criteria.') output_json(tube_records, 'tube_recs')
from archimedes.functions.dataflow import input_path, output_json, input_json from archimedes.functions.scanpy import scanpy as sc from archimedes.functions.environment import project_name scdata = sc.read(input_path('leiden_anndata.h5ad')) pdat = input_json("project_data")[project_name] color_options = { 'Gene': dict([[gene_id, None] for gene_id in scdata.raw.var_names]), 'Cluster': None, 'Manual Annotations': None, 'Tube': None } color_options.update([[label, None] for label in pdat['color_options'].keys()]) output_json(color_options, 'color_options')
from archimedes.functions.dataflow import output_json, input_var, input_json, parseModelAttr, buildTargetPath from archimedes.functions.magma import connect, question from archimedes.functions.environment import token, magma_host, project_name input_records = input_json('record_ids') pdat = input_json("project_data")[project_name] seq_target = parseModelAttr(pdat['seq_h5_counts_data']) magma = connect() h5_locations = question( magma, [ seq_target['model'], ['::identifier', '::in', input_records], '::all', seq_target['attribute'], '::url' ], strip_identifiers=False) output_json(h5_locations, 'h5_locations')
from archimedes.functions.dataflow import output_path, input_path, input_var, input_bool, output_json from archimedes.functions.scanpy import scanpy as sc scdata = sc.read(input_path('umap_anndata.h5ad')) use_weights = input_bool('use_weights') # Calculate leiden clustering leiden_resolution = float(input_var('leiden_resolution')) sc.tl.leiden(scdata, resolution=leiden_resolution, key_added="leiden", use_weights=use_weights) ### Output scdata.write(output_path('leiden_anndata.h5ad')) clusts = scdata.obs['leiden'].tolist() output_json(clusts, 'leiden.json') output_json( dict([str(clust), str(clust)] for clust in range(max([int(str) for str in clusts]) + 1)), 'blank_annots.json')
) DF['cluster'] = cluster return DF pd.concat( (DF_per_cluster(DEdat, cluster)) for cluster in unique(scdata.obs['leiden']) ).to_csv(output_path('diffexp.csv')) # Extract top 10 markers per cluster, ignoring given certain prefixes names = DEdat['names'] out_len = 10 filters = input_var('ignore_prefixes').lower() if len(filters) > 0: def trim_prefix(list, prefix): for item in list[:]: if item.lower().startswith(prefix): list.remove(item) return list def trim_prefixes(list, prefixes): for prefix in prefixes: list = trim_prefix(list, prefix) return list top10 = dict( [ clust, trim_prefixes(list(names[clust]), filters.split(","))[:(out_len-1)] ] for clust in unique(scdata.obs['leiden']) ) else: top10 = pd.DataFrame(names).head(10).to_dict('list') output_json(top10, 'top10.json')