def get_gene_names(feature_ids, source, species): """Map feature IDs to gene names.""" res = resdk.Resolwe() features = res.feature.filter(feature_id__in=feature_ids, source=source, species=species) return [feature.name for feature in features]
def setUp(self): self.cache_dir = tempfile.mkdtemp() self.test_server_url = "https://app.genialis.com" self.test_collection_slug = "resdk-test-collection-tables" self.res = resdk.Resolwe( url=self.test_server_url, username="******", password="******" ) self.collection = self.res.collection.get(self.test_collection_slug) self.ct = RNATables(self.collection, cache_dir=self.cache_dir)
def setUp(self): self.res = resdk.Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) # Create data for tests: self.reads = self.upload_reads(self.res) self.genome = self.upload_genome(self.res) # Set permissions for running processes: self.allow_run_process(self.res, 'alignment-hisat2') super(TestStart, self).setUp()
def setUp(self): self.res = resdk.Resolwe(EMAIL, PASSW, URL) self.reads = os.path.abspath( os.path.normpath( os.path.dirname(__file__) + "./../files/example.fastq")) self.basename = os.path.basename(self.reads) self.fastq = self.basename + ".gz" self.fastqc_archive = self.basename.split('.')[0] + "_fastqc.zip" self.data = self.res.run(slug='upload-fastq-single', input={'src': [self.reads]}) wait_for_update(self.data, maxtime=20, step=2)
def main(): """Invoke when run directly as a program.""" args = parse_arguments() res = resdk.Resolwe() with open(args.feature_ids) as gene_file: genes = [gene.strip() for gene in gene_file] org_features = res.feature.filter(source=args.source_db, feature_id=genes) if len(org_features) == 0: print( '{"proc.error":"No genes were fetched from the knowledge base."}') exit(1) species = set(feature.species for feature in org_features) if len(species) != 1: print('{"proc.error":"Input genes belong to multiple species."}') exit(1) else: species = species.pop() if args.species == species and args.source_db == args.target_db: target_ids = genes else: features = res.mapping.filter(source_db=args.source_db, target_db=args.target_db, source_id=genes) if len(features) == 0: print('{"proc.error":"Failed to map features."}') exit(1) target_ids = [str(feature.target_id) for feature in features] if len(genes) > len(target_ids): print('{"proc.warning":"Not all features could be mapped."}') with tempfile.NamedTemporaryFile() as input_genes: input_genes.write(' '.join(target_ids).encode("UTF-8")) input_genes.flush() process = Popen([ 'processor', str(args.pval), str(args.min_genes), args.obo, args.gaf, input_genes.name ], stdout=PIPE, stderr=DEVNULL) out, err = process.communicate() with open('terms.json', 'w') as f: f.write(out.decode("UTF-8"))
def setUp(self): self.res = resdk.Resolwe(EMAIL, PASSW, URL) self.reads = os.path.abspath( os.path.normpath( os.path.dirname(__file__) + "./../files/example.fastq")) self.data = self.res.run(slug='upload-fastq-single', input={'src': [self.reads]}) wait_for_update(self.data, maxtime=20, step=2) self.contributor = self.data.contributor # Make collection and add self.data to it. json_data = self.res.api.collection.post( {u'name': 'testing_collection'}) self.collection = Collection(model_data=json_data, resolwe=self.res)
def setUp(self): self.res = resdk.Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) self.reads = self.upload_reads(self.res) self.genome = self.upload_genome(self.res) self.genome_index = self.create_genome_index(self.res, self.genome) self.annotation = self.upload_annotation(self.res) # Set permissions for running processes: self.allow_run_process(self.res, 'upload-fastq-single') self.allow_run_process(self.res, 'alignment-hisat2') self.allow_run_process(self.res, 'workflow-bbduk-star-htseq') # Set permissions for using descriptor_schemas: self.allow_use_descriptor_schema(self.res, 'reads') self.allow_use_descriptor_schema(self.res, 'sample') super().setUp()
def setUp(self): self.res = resdk.Resolwe(EMAIL, PASSW, URL) self.reads = os.path.abspath( os.path.normpath( os.path.dirname(__file__) + "./../files/example.fastq")) self.basename = os.path.basename(self.reads) self.fastq = self.basename + ".gz" self.fastqc_archive = self.basename.split('.')[0] + "_fastqc.zip" self.data = self.res.run(slug='upload-fastq-single', input={'src': [self.reads]}) wait_for_update(self.data, maxtime=20, step=2) # Make a sample self.sample = self.res.presample.filter(data=self.data.id)[0] self.sample.confirm_is_annotated() # Pull the same sample down again to get it as Sample with sample endpoint: self.sample = self.res.sample.filter(data=self.data.id)[0]
def test_tutorial_resources(self): """Verify existance of resources required for tutorial.""" res = resdk.Resolwe(url='https://app.genialis.com') sample_slugs = [ BaseResdkDocsFunctionalTest.sample_slug, ] for sample_slug in sample_slugs: res.sample.get(sample_slug) data_slugs = [ BaseResdkDocsFunctionalTest.reads_slug, BaseResdkDocsFunctionalTest.genome_slug, BaseResdkDocsFunctionalTest.annotation_slug, BaseResdkDocsFunctionalTest.genome_index_slug, ] for data_slug in data_slugs: res.data.get(data_slug)
def setUp(self): self.res = resdk.Resolwe(EMAIL, PASSW, URL) self.reads = os.path.abspath( os.path.normpath( os.path.dirname(__file__) + "./../files/example.fastq")) self.yaml_path = os.path.abspath( os.path.normpath( os.path.dirname(__file__) + "./../files/custom_process.yaml")) self.tool1_path = os.path.abspath( os.path.normpath(os.path.dirname(__file__) + "./../files/sum.py")) # # Make a collection: colllection_json = self.res.api.collection.post( {u'name': 'test_collection'}) self.collection = Collection(model_data=colllection_json, resolwe=self.res) self.result = None
def validate_protein(self): """Only validate protein names if species is human or mouse.""" res = resdk.Resolwe(url="https://app.genialis.com") for sample_name in self.sample_names: species = self.get_element(column_name="Species", sample_name=sample_name) protein = self.get_element(column_name="Protein", sample_name=sample_name) gene_symbol = self.get_part_before_colon_hypen(protein) if gene_symbol and gene_symbol not in PROTEIN: if species in ["H**o sapiens", "Mus musculus"]: kb_gene = res.feature.filter(source="UCSC", feature_id=[gene_symbol]) if not kb_gene: self.error( "SAMPLE: {} - Gene symbol {} is either invalid or " "Knowledge Base cannot be reached.".format( sample_name, protein))
def main(): """Run.""" res = resdk.Resolwe(url=IMAPS_URL) res.login() errors = [] for species in SPECIES: check_asset(res, GENOME[species], errors) check_asset(res, ANNOTATION[species], errors) check_asset(res, SEGMENT[species], errors) check_asset(res, STAR_INDEX[species], errors) check_asset(res, TRNA_RRNA_SEQ[species], errors) check_asset(res, TRNA_RRNA_INDEX[species], errors) if errors: for err in errors: print(err) raise ValueError("See errors above.") print("All good, assets as expected.")
def main(): """Invoke when run directly as a program.""" args = parse_arguments() res = resdk.Resolwe(url=SERVER_URL) res.login() collection = res.collection.get(name=args.collection) types = parse_types(args.types) for data in collection.data: if data.status != "OK": continue for type_ in types: # type is a tuple of size 1 or 2: (field_name) or (field_name, process_type) if len(type_) == 2: if not data.process.type.strip(":").endswith(type_[1]): continue field_name = type_[0] if field_name not in data.output: continue if isinstance(data.output[field_name], list): for item in data.output[field_name]: # Check if file name of the file to-be-downloaded will be # clashing with existing filenames in download direcory. If # so, rename existing file to unexisting name. original_name = os.path.basename(item["file"]) rename_if_clashing(original_name, args.directory) else: original_name = os.path.basename( data.output[field_name]["file"]) rename_if_clashing(original_name, args.directory) print("Downloading {} output of data {} ...".format( field_name, data.name)) data.download(field_name=field_name, download_dir=args.directory)
def run(self, inputs, outputs): """Run analysis.""" basename = os.path.basename(inputs.slamdunk.tcount.path) assert basename.endswith(".tsv") name = basename[:-4] args = [ "-o", ".", "-t", self.requirements.resources.cores, ] return_code, _, _ = Cmd["alleyoop"]["collapse"][args][ inputs.slamdunk.tcount.path] & TEE(retcode=None) if return_code: self.error("Alleyoop collapse analysis failed.") collapsed_output = name + "_collapsed.txt" os.rename(name + "_collapsed.csv", collapsed_output) # normalize to TPM tcount_tpm = compute_tpm(collapsed_output) # Map gene symbols to feature IDs res = resdk.Resolwe() CHUNK_SIZE = 1000 feature_dict = {} out_columns = [ "gene_symbol", "length", "readsCPM", "readsTPM", "conversionRate", "Tcontent", "coverageOnTs", "conversionsOnTs", "readCount", "tcReadCount", "multimapCount", ] input_features = tcount_tpm.index.tolist() features_sublists = [ input_features[i:i + CHUNK_SIZE] for i in range(0, len(input_features), CHUNK_SIZE) ] for fsublist in features_sublists: features = res.feature.filter( source=inputs.source, species=inputs.slamdunk.species, feature_id__in=fsublist, ) feature_dict.update({f.feature_id: f.name for f in features}) tcount_tpm["gene_symbol"] = tcount_tpm.index.map(feature_dict) tcount_tpm.to_csv(collapsed_output, columns=out_columns, sep="\t") outputs.tcount = collapsed_output outputs.species = inputs.slamdunk.species outputs.build = inputs.slamdunk.build
def setUp(self): self.res = resdk.Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) self.reads = self.upload_reads(self.res) super(TestTutorialGet, self).setUp()
import os import csv import resdk from resdk.resources import Collection parser = argparse.ArgumentParser(description='Upload raw data.') parser.add_argument('-sample_sheet', type=str, help='Sample sheet', required=True) parser.add_argument('-username', type=str, help='Username', required=True) parser.add_argument('-password', type=str, help='Password', required=True) parser.add_argument('-URL', type=str, help='URL', required=True) args = parser.parse_args() res = resdk.Resolwe(args.username, args.password, args.URL) resdk.start_logging() samples = {} with open(args.sample_sheet, 'rb') as sample_sheet: sample_reader = csv.reader(sample_sheet, delimiter='\t') header = next(sample_reader) for row in sample_reader: samples[row[0]] = {col: '' for col in header} for i, column in enumerate(row): if i == 0: continue # skip sample name samples[row[0]] [header[i]] = column
def main(): """Invoke when run directly as a program.""" args = parse_arguments() if args.norm_expressions and args.norm_expressions_type: if len(args.norm_expressions) != len(args.norm_expressions_type): print( error( 'The number of additional expression files must match the number of specified ' 'expressions types.')) sys.exit(1) if args.norm_expressions_type: exp_types = [args.expressions_type] + args.norm_expressions_type if len(exp_types) != len(set(exp_types)): print( error( 'The union of the main expression type ({}) and additional normalized expression types {} ' 'does not contain unique items.'.format( args.expressions_type, args.norm_expressions_type))) sys.exit(1) res = resdk.Resolwe() feature_dict = {} df = parse_expression_file(args.expressions, args.expressions_type) # Get a list of feature IDs input_features = df['FEATURE_ID'].tolist() # Split feature IDs into chunks with max size of 10000 elements features_sublists = [ input_features[i:i + CHUNK_SIZE] for i in range(0, len(input_features), CHUNK_SIZE) ] # Fetch features from KB and add them to {feature_id: feature_name} mapping dict for fsublist in features_sublists: features = res.feature.filter(source=args.source_db, species=args.species, feature_id=fsublist) feature_dict.update({f.feature_id: f.name for f in features}) # Map gene symbols to feature IDs df['GENE_SYMBOL'] = df['FEATURE_ID'].map(feature_dict) # Check if all of the input feature IDs could be mapped to the gene symbols if not all(f_id in feature_dict for f_id in input_features): print( warning( '{} feature(s) could not be mapped to the associated feature symbols.' .format(sum(df.isnull().values.ravel())))) # Merge additional expression files with the original data frame if args.norm_expressions and args.norm_expressions_type: for exp_file, exp_type in zip(args.norm_expressions, args.norm_expressions_type): exp_df = parse_expression_file(exp_file, exp_type) df = df.merge(exp_df, on='FEATURE_ID') # Reorder the columns in dataframe columns = ['FEATURE_ID', 'GENE_SYMBOL', args.expressions_type] if args.norm_expressions_type: columns = columns + args.norm_expressions_type df = df[columns] # Replace NaN values with empty string df.fillna('', inplace=True) # Write to file df.to_csv(args.output_name + '.txt.gz', header=True, index=False, sep='\t', compression='gzip') # Write to JSON df_dict = df.set_index('FEATURE_ID').to_dict(orient='index') with open(args.output_name + '.json', 'w') as f: json.dump({'genes': df_dict}, f, allow_nan=False)
def setUp(self): self.res = resdk.Resolwe(EMAIL, PASSW, URL) self.remove = []
def main(): """Invoke when run directly as a program.""" args = parse_arguments() res = resdk.Resolwe() with open(args.feature_ids) as gene_file: genes = [gene.strip() for gene in gene_file] org_features = res.feature.filter(source=args.source_db, species=args.species, feature_id=genes) if len(org_features) == 0: print(error("No genes were fetched from the knowledge base.")) exit(1) if args.source_db == args.target_db: target_ids = genes else: mapping_res = res.mapping.filter( source_db=args.source_db, source_species=args.species, target_db=args.target_db, target_species=args.species, source_id=genes, ) if len(mapping_res) == 0: print(error("Failed to map features.")) exit(1) mappings = {} for m in mapping_res: if m.source_id in genes: if m.source_id not in mappings: mappings[m.source_id] = m.target_id else: print( warning( "Mapping {} returned multiple times.".format(m))) if len(genes) > len(mappings): print(warning("Not all features could be mapped.")) target_ids = mappings.values() with tempfile.NamedTemporaryFile() as input_genes: input_genes.write(' '.join(target_ids).encode("UTF-8")) input_genes.flush() process = Popen([ 'processor', str(args.pval), str(args.min_genes), args.obo, args.gaf, input_genes.name ], stdout=PIPE, stderr=DEVNULL) out, err = process.communicate() with open('terms.json', 'w') as f: f.write(out.decode("UTF-8"))
import resdk # Create a Resolwe object to interact with the server res = resdk.Resolwe('admin', 'admin', 'https://torta.bcm.genialis.com') # Print command details to stdout resdk.start_logging() # Get sample meta-data from the server sample = res.sample.get('human-example-chr22') # Download files associated with the sample sample.download()
"""Code for ``tutorial-get.rst`` file.""" import resdk # Create a Resolwe object to interact with the server and login res = resdk.Resolwe(url='https://app.genialis.com') res.login() # Enable verbose logging to standard output resdk.start_logging() res.data.all() res.sample.all() # Get all Collection objects with "SDK" in their name res.collection.filter(name='SDK') # Get all Processes with category "Align" res.process.filter(category='Align') # Filter by using several fields: res.data.filter( status='OK', created__gt='2018-10-01', created__lt='2025-11-01', ordering='-modified', limit=3, ) # Get object by slug res.sample.get('resdk-example')
import resdk # Create a Resolwe object to interact with the server res = resdk.Resolwe('<USERNAME>', '<PASSWORD>', 'https://app.genialis.com') # Enable verbose logging to standard output resdk.start_logging() # Get sample meta-data from the server sample = res.sample.get('mouse-example-chr19') # Download files associated with the sample sample.download()