def main(): """Run RNA-seq workflow.""" args = parse_arguments() data = { 'input': { 'reads': args.reads, 'minlen': args.minlen, 'trailing': args.trailing, 'stranded': args.stranded, 'id_attribute': args.id_attribute } } if args.type.startswith('data:reads:fastq:single:'): data['process'] = 'workflow-rnaseq-single' if args.type.startswith('data:reads:fastq:paired:'): data['process'] = 'workflow-rnaseq-paired' gna = GENOMES_AND_ANNOTATIONS[args.genome_and_annotation] ada = ADAPTERS[args.adapters] res = Resolwe() data['input']['genome'] = get_data_id(res, gna['genome'], 'data:genome:fasta:') data['input']['annotation'] = get_data_id(res, gna['annotation'], 'data:annotation:gtf:') if ada: data['input']['adapters'] = get_data_id(res, ada, 'data:seq:nucleotide:') print('run {}'.format(dumps(data, separators=(',', ':'))))
def setUp(self): self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) # Create data for tests: self.reads = self.upload_reads(self.res) self.genome = self.upload_genome(self.res) # Set permissions for running processes: self.allow_run_process(self.res, 'alignment-hisat2') super().setUp()
def __init__(self, user, password, url): try: self._res = Resolwe(user, password, url) except requests.exceptions.InvalidURL as e: raise ResolweServerException(e) except ValueError as e: # TODO: is there a better way? resdk returns only ValueError msg = str(e) if msg == 'Response HTTP status code 400. Invalid credentials?': raise ResolweCredentialsException(msg) elif msg == 'Server not accessible on {}. Wrong url?'.format(url): raise ResolweServerException(msg) else: raise
def setUp(self): self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) self.reads = self.upload_reads(self.res) self.genome = self.upload_genome(self.res) self.genome_index = self.create_genome_index(self.res, self.genome) self.annotation = self.upload_annotation(self.res) # Set permissions for running processes: self.allow_run_process(self.res, 'upload-fastq-single') self.allow_run_process(self.res, 'alignment-hisat2') self.allow_run_process(self.res, 'workflow-bbduk-star-htseq') # Set permissions for using descriptor_schemas: self.allow_use_descriptor_schema(self.res, 'reads') self.allow_use_descriptor_schema(self.res, 'sample') super().setUp()
def test_tutorial_resources(self): """Verify existance of resources required for tutorial.""" res = Resolwe(url='https://app.genialis.com') sample_slugs = [ BaseResdkDocsFunctionalTest.sample_slug, ] for sample_slug in sample_slugs: res.sample.get(sample_slug) # pylint: disable=no-member data_slugs = [ BaseResdkDocsFunctionalTest.reads_slug, BaseResdkDocsFunctionalTest.genome_slug, BaseResdkDocsFunctionalTest.annotation_slug, BaseResdkDocsFunctionalTest.genome_index_slug, ] for data_slug in data_slugs: res.data.get(slug=data_slug, fields='id') # pylint: disable=no-member
def test_tutorial_resources(self): """Verify existence of resources required for tutorial.""" res = Resolwe(url="https://app.genialis.com") sample_slugs = [ BaseResdkDocsFunctionalTest.sample_slug, ] for sample_slug in sample_slugs: res.sample.get(sample_slug) data_slugs = [ BaseResdkDocsFunctionalTest.reads_slug, BaseResdkDocsFunctionalTest.genome_slug, BaseResdkDocsFunctionalTest.annotation_slug, BaseResdkDocsFunctionalTest.genome_index_slug, BaseResdkDocsFunctionalTest.rrna_slug, BaseResdkDocsFunctionalTest.rrna_index_slug, BaseResdkDocsFunctionalTest.globin_slug, BaseResdkDocsFunctionalTest.globin_index_slug, ] for data_slug in data_slugs: res.data.get(slug=data_slug, fields="id")
class BaseResdkFunctionalTest(unittest.TestCase): """Base class for functional tests in ReSDK. It generates 2 Resolwe classes for connection to server. One with admin's credentials (``self.res``) and one with normal user's credentials (``self.user_res``). It also includes utility functions to generate data objects of basic types with dummy input files. """ def setUp(self): self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL) def get_genome(self, collection=None): """Return genome data object. :param colection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta') collections = {'collections': [collection]} if collection else {} return self.res.run('upload-genome', input={'src': genome_path}, **collections) def get_gtf(self, collection=None): """Return gff3 data object. :param colection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf') collections = {'collections': [collection]} if collection else {} return self.res.run('upload-gtf', input={ 'src': gtf_path, 'source': 'NCBI' }, **collections) def get_reads(self, count=1, collection=None): """Return reads data objects. :param int count: number of objects to return :param colection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq') collections = {'collections': [collection]} if collection else {} reads = [] for _ in range(count): read = self.res.run('upload-fastq-single', input={'src': reads_path}, **collections) reads.append(read) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(read.sample) return reads def get_bams(self, count=1, collection=None): """Return bam data objects. :param int count: number of objects to return :param colection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam') collections = {'collections': [collection]} if collection else {} bams = [] for _ in range(count): bam = self.res.run('upload-bam', input={'src': bam_path}, **collections) bam.sample.update_descriptor( # pylint: disable=no-member {'sample': { 'organism': 'H**o sapiens' }}) bams.append(bam) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(bam.sample) return bams def get_macs(self, count=1, collection=None): """Return macs data objects. :param int count: number of objects to return :param colection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ bams = self.get_bams(count, collection) return macs([bam.sample for bam in bams], use_background=False) def get_cuffquants(self, count=1, collection=None): """Return cuffquant data objects. :param int count: number of objects to return :param colection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb') collections = {'collections': [collection]} if collection else {} cuffquants = [] for _ in range(count): cuffquant = self.res.run('upload-cxb', input={ 'src': cuffquant_path, 'source': 'NCBI' }, **collections) cuffquants.append(cuffquant) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(cuffquant.sample) return cuffquants
def setUp(self): self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) self.reads = self.upload_reads(self.res) super().setUp()
def upload_reads(): """Upload NGS reads to the Resolwe server.""" description = """Upload single-end or paired-end NGS reads to the Resolwe server. UPLOAD A SINGLE-END FASTQ FILE: resolwe-upload-reads -r sample1.fastq.gz UPLOAD A SET OF MULTI-LANE FASTQ FILES: resolwe-upload-reads -r sample1_lane1.fastq.gz sample1_lane2.fastq.gz UPLOAD A PAIR OF PAIRED-END READS FILES: resolwe-upload-reads -r1 sample1_mate1.fastq.gz -r2 sample1_mate2.fastq.gz UPLOAD ALL SINGLE-END READS IN A WORKING DIRECTORY: for reads_file in *.fastq.gz do resolwe-upload-reads -r ${reads_file} done """ parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, description=description) parser.add_argument('-a', '--address', default='https://torta.bcm.genialis.com', help='Resolwe server address') parser.add_argument('-u', '--username', default='admin', help='Username') parser.add_argument('-p', '--password', default='admin', help='User password') parser.add_argument('-c', '--collection', nargs='*', type=int, help='Collection ID(s)') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose reporting') parser.add_argument('-r', metavar='READS-LANE-X', nargs='*', help='Single-end reads (<read1_lane1 read1_lane2, ..>)') parser.add_argument('-r1', metavar='MATE-1-LANE-X', nargs='*', help='Paired-end reads mate1 (<mate1_lane1 mate1_lane2, ..>)') parser.add_argument('-r2', metavar='MATE-2-LANE-X', nargs='*', help='Paired-end reads mate2 (<mate1_lane1 mate1_lane2, ..>)') args = parser.parse_args() if args.verbose: resdk_logger.start_logging() if not (args.r or (args.r1 and args.r2)) or (args.r and (args.r1 or args.r2)): parser.print_help() print("\nERROR: define either -r or -r1 and -r2.\n") exit(1) if not args.r and len(args.r1) != len(args.r2): parser.print_help() print("\nERROR: -r1 and -r2 file list length must match\n") exit(1) resolwe = Resolwe(args.username, args.password, args.address) if args.r: if all(os.path.isfile(file) for file in args.r): resolwe.run('upload-fastq-single', {'src': args.r}, collections=args.collection) else: print("\nERROR: Incorrect file path(s).\n") exit(1) else: if (all(os.path.isfile(file) for file in args.r1) and all(os.path.isfile(file) for file in args.r2)): resolwe.run('upload-fastq-paired', {'src1': args.r1, 'src2': args.r2}, collections=args.collection) else: print("\nERROR: Incorrect file path(s).\n") exit(1)
from resdk import Resolwe res = Resolwe('admin', 'admin', 'https://torta.bcm.genialis.com') # Recomended: start logging resdk.start_logging() sample = res.sample.get(1) sample.download(type='bam') samples = res.sample.filter(descriptor__organism="H**o sapiens") for sample in samples: sample.download(type='bam') sample = res.sample.get(1) for data_id in sample.data: data = res.data.get(data_id) print data.process_name rose2_list = res.data.filter(type='data:chipseq:rose2:') rose2 = rose2_list[0] rose2.download(name='20150531-u266-A-H3K27Ac-ML1949_S2_R1_mapped_peaks_Plot_panel.png') genome = res.data.get('hg19') genome_id = genome.id reads_id = sample.data[0] aligned = res.run('alignment-bowtie-2-2-3_trim', input={ 'genome': genome_id, 'reads': reads_id, 'reporting': {'rep_mode': 'k', 'k_reports': 1} }) aligned.status
class BaseResdkFunctionalTest(unittest.TestCase): """Base class for functional tests in ReSDK. It generates 2 Resolwe classes for connection to server. One with admin's credentials (``self.res``) and one with normal user's credentials (``self.user_res``). It also includes utility functions to generate data objects of basic types with dummy input files. """ def setUp(self): self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL) def get_genome(self, collection=None, species='H**o sapiens', build='hg38'): """Return genome data object. :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta') collections = {'collections': [collection]} if collection else {} return self.res.run('upload-genome', input={ 'src': genome_path, 'species': species, 'build': build, }, **collections) def get_gtf(self, collection=None, species='H**o sapiens', build='hg38'): """Return gff3 data object. :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf') collections = {'collections': [collection]} if collection else {} return self.res.run('upload-gtf', input={ 'src': gtf_path, 'source': 'NCBI', 'species': species, 'build': build, }, **collections) def get_reads(self, count=1, collection=None): """Return reads data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq') collections = {'collections': [collection]} if collection else {} reads = [] for _ in range(count): read = self.res.run('upload-fastq-single', input={'src': reads_path}, **collections) reads.append(read) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(read.sample) return reads def get_multiplexed(self, count=1, collection=None): """ Return demultiplexed reads data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ collections = {'collections': [collection]} if collection else {} inputs = { 'reads': 'pool24.read1.small.qseq.bz2', 'reads2': 'pool24.read3.small.qseq.bz2', 'barcodes': 'pool24.read2.small.qseq.bz2', 'annotation': 'pool24.tsv', } inputs = {k: os.path.join(FILES_PATH, v) for k, v in inputs.items()} multi = [ self.res.run('upload-multiplexed-paired', inputs, **collections) for _ in range(count) ] return multi def get_bams(self, count=1, collection=None, species='H**o sapiens', build='hg38'): """Return bam data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam') collections = {'collections': [collection]} if collection else {} bams = [] for _ in range(count): bam = self.res.run('upload-bam', input={ 'src': bam_path, 'species': species, 'build': build, }, **collections) bams.append(bam) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(bam.sample) return bams def get_macs(self, count=1, collection=None): """Return macs data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ bams = self.get_bams(count, collection) return macs([bam.sample for bam in bams], use_background=False) def get_cuffquants(self, count=1, collection=None, species='H**o sapiens', build='hg38'): """Return cuffquant data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb') collections = {'collections': [collection]} if collection else {} cuffquants = [] for _ in range(count): cuffquant = self.res.run('upload-cxb', input={ 'src': cuffquant_path, 'source': 'NCBI', 'species': species, 'build': build, }, **collections) cuffquants.append(cuffquant) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(cuffquant.sample) return cuffquants def get_expression(self, count=1, collection=None, species='H**o sapiens', build='hg38'): """Return expression data objects. :param int count: number of objects to return :param colection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ expression_path = os.path.join(FILES_PATH, 'dummy_expression.tab') collections = {'collections': [collection]} if collection else {} expressions = [] for _ in range(count): expression = self.res.run('upload-expression', input={ 'exp': expression_path, 'exp_name': 'test_expression', 'source': 'NCBI', 'species': species, 'build': build, }, **collections) expressions.append(expression) return expressions
def sequp(): """Auto-upload NGS reads from directory to the Resolwe server. Script checks if there are new reads or annotation files in the target directory tree. If both: reads and corresponding annotation files are present, upload the reads and set the initial annotation based on the annotation file. We want to upload files which have not been uploaded yet. We need to know the most recent modification date of all uploaded files. Files modified later are upload candidates. The timestamp of last modification time is stored in config_file. """ # XXX: Saving the config_file in user_data_dir is probably not the # right decision. We want multiple users to be able to upload data # to the same directory - therefore the config_file should be set # for the system and not user dependant. # Application data config_file = os.path.join(appdirs.user_data_dir(about.__title__, about.__author__), 'config') # XXX: Increase to 1h change_time_window = 5 parser = argparse.ArgumentParser(description='Auto-upload NGS reads from ' 'directory to the Resolwe server.') parser.add_argument('-a', '--address', help='Resolwe server address') parser.add_argument('-u', '--username', help='Username') parser.add_argument('-p', '--password', help='User password') parser.add_argument('-d', '--directory', help='Observed directory with reads') parser.add_argument('-f', '--force', action='store_true', help='Force upload of all files') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose reporting') args = parser.parse_args() if args.verbose: resdk_logger.start_logging() genialis_url = args.address or os.getenv('GENIALIS_URL') or 'http://localhost:8000' genialis_username = args.username or os.getenv('GENIALIS_USERNAME') or 'admin' genialis_pass = args.password or os.getenv('GENIALIS_PASS') or 'admin' genialis_seq_dir = args.directory or os.getenv('GENIALIS_SEQ_DIR') or os.path.expanduser('~') genialis_seq_dir = os.path.normpath(genialis_seq_dir) logger.info('Address: {}'.format(genialis_url)) logger.info('User: {}'.format(genialis_username)) logger.info('Pass: ******') logger.info('Directory: {}'.format(genialis_seq_dir)) if args.force and os.path.isfile(config_file): os.remove(config_file) def read_timestamps(): """Read timestamps from config_file. :rtype: Dict of pairs (dir, timestamp) """ if not os.path.isfile(config_file): return {} data = {} with open(config_file, 'r') as file_: for line in file_: parts = line.strip().split('\t') data[parts[0]] = float(parts[1]) return data def write_timestamps(pairs): """Write timestamps to config_file.""" with open(config_file, 'w') as file_: for first, second in pairs.items(): file_.write(str(first) + '\t' + str(second) + '\n') def get_timestamp(): """Get timestamp for GENIALIS_SEQ_DIR.""" timestamps = read_timestamps() return timestamps.get(genialis_seq_dir, 0) def set_timestamp(timestamp): """Set timestamp for GENIALIS_SEQ_DIR.""" if os.path.isfile(config_file): # Update timestamp pairs = read_timestamps() pairs[genialis_seq_dir] = timestamp write_timestamps(pairs) else: # Create config file and add timestamp try: os.makedirs(os.path.dirname(config_file)) write_timestamps({genialis_seq_dir: timestamp}) except OSError: # Folder already exists, make the file write_timestamps({genialis_seq_dir: timestamp}) # Get timestamp timestamp = get_timestamp() # Find new reads all_new_read_files = [] read_file_extensions = ['*.fastq', '*.fastq.gz', '*.fq', '*.fq.gz'] for root, _, filenames in os.walk(genialis_seq_dir): for extension in read_file_extensions: for filename in fnmatch.filter(filenames, extension): path = os.path.join(root, filename) if os.path.getmtime(path) > timestamp: all_new_read_files.append(path) # Determnine if the candidate files are fully uploaded by the # sequencer. The idea is that the file size does not change in a # defined time window (change_time_window). sizes1 = {f: os.path.getsize(f) for f in all_new_read_files} time.sleep(change_time_window) sizes2 = {f: os.path.getsize(f) for f in all_new_read_files} all_new_read_files_uploaded = [os.path.normpath(f) for f in all_new_read_files if sizes1[f] == sizes2[f]] # Find all annotation files all_annotation_files = [] annotation_file_extensions = ['*.csv', '*.txt', '*.tsv'] for root, _, filenames in os.walk(genialis_seq_dir): for extension in annotation_file_extensions: for filename in fnmatch.filter(filenames, extension): all_annotation_files.append(os.path.join(root, filename)) def parse_annotation_file(annotation_file): """Parse annotation file to list of annotation objects.""" anns = {} seq_paths = [] # We use 'rU' mode to be able to read also files with '\r' chars with open(annotation_file, 'rU') as file_: try: reader = csv.DictReader([row for row in file_ if row[0] != '#'], delimiter=str('\t')) # One line is one annotation (one reads file) for row in reader: # Capitalize dict keys row.update({k.upper(): v for k, v in row.items()}) if 'FASTQ_PATH' in row: for seqfile in row['FASTQ_PATH'].split(','): seq_path = os.path.normpath(os.path.join(genialis_seq_dir, seqfile)) seq_paths.append(seq_path) if all(os.path.isfile(sf) for sf in seq_paths): row['FASTQ_PATH'] = ','.join(seq_paths) anns[row['SAMPLE_NAME']] = row seq_paths = [] except csv.Error: logger.error("File type not supported") exit(1) return anns # Write all annotations to single dict with reads filenames as keys annotations = {} for ann_file in all_annotation_files: annotations.update(parse_annotation_file(ann_file)) # Connect to Resolwe server resolwe = Resolwe(genialis_username, genialis_pass, genialis_url) read_schemas = resolwe.api.descriptorschema.get(slug='reads') read_schema = read_schemas[0] if read_schemas else None # Upload all files in all_new_read_files_uploaded with annotations uploaded_files = [] for sample_n in annotations: input_ = {} fw_reads = annotations[sample_n]['FASTQ_PATH'].split(',') if set(fw_reads).issubset(set(all_new_read_files_uploaded)): descriptor, descriptor_schema = None, None if read_schema: descriptor_schema = read_schema['slug'] barcode_removed = annotations[sample_n].get('BARCODE_REMOVED', 'N').strip().upper() exp_type = EXPERIMENT_TYPE.get(annotations[sample_n]['SEQ_TYPE'].upper(), '') descriptor = { 'reads_info': { 'barcode': annotations[sample_n].get('BARCODE', None), 'barcode_removed': True if barcode_removed == 'Y' else False, 'instrument_type': annotations[sample_n].get('INSTRUMENT', None), 'seq_date': annotations[sample_n].get('SEQ_DATE', None) } } if exp_type: descriptor['experiment_type'] = exp_type # Paired-end reads if (annotations[sample_n]['PAIRED_END'] == 'Y' and annotations[sample_n]['FASTQ_PATH_PAIR']): rw_reads = annotations[sample_n]['FASTQ_PATH_PAIR'].split(',') slug = 'upload-fastq-paired' input_['src1'] = fw_reads input_['src2'] = [os.path.join(genialis_seq_dir, f) for f in rw_reads] file_path = input_['src1'] + input_['src2'] # Single-end reads else: slug = 'upload-fastq-single' input_['src'] = fw_reads file_path = input_['src'] data = resolwe.run(slug, input=input_, descriptor=descriptor, descriptor_schema=descriptor_schema, data_name=sample_n) if data: for up_file in file_path: uploaded_files.append(up_file) sample = data.sample if 'sample' not in sample.descriptor: sample.descriptor['sample'] = {} organism = ORGANISMS.get(annotations[sample_n]['ORGANISM'].upper(), '') if organism: sample.descriptor['sample']['organism'] = organism sample.update_descriptor(sample.descriptor) else: logger.error("Error uploading {}".format(sample_n)) # Set the modification timestamp modif_times = [os.path.getmtime(f) for f in uploaded_files] if modif_times: set_timestamp(sorted(modif_times)[-1])
def setUp(self): self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)
class BaseResdkFunctionalTest(unittest.TestCase): """Base class for functional tests in ReSDK. It generates 2 Resolwe classes for connection to server. One with admin's credentials (``self.res``) and one with normal user's credentials (``self.user_res``). It also includes utility functions to generate data objects of basic types with dummy input files. """ def setUp(self): self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL) def get_genome(self, collection=None, species='H**o sapiens', build='hg38'): """Return genome data object. :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta') collections = {'collections': [collection]} if collection else {} return self.res.run( 'upload-genome', input={ 'src': genome_path, 'species': species, 'build': build, }, **collections ) def get_gtf(self, collection=None, species='H**o sapiens', build='hg38'): """Return gff3 data object. :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf') collections = {'collections': [collection]} if collection else {} return self.res.run( 'upload-gtf', input={ 'src': gtf_path, 'source': 'NCBI', 'species': species, 'build': build, }, **collections ) def get_reads(self, count=1, collection=None): """Return reads data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq') collections = {'collections': [collection]} if collection else {} reads = [] for _ in range(count): read = self.res.run( 'upload-fastq-single', input={'src': reads_path}, **collections ) reads.append(read) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(read.sample) return reads def get_bams(self, count=1, collection=None, species='H**o sapiens', build='hg38'): """Return bam data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam') collections = {'collections': [collection]} if collection else {} bams = [] for _ in range(count): bam = self.res.run( 'upload-bam', input={ 'src': bam_path, 'species': species, 'build': build, }, **collections ) bams.append(bam) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(bam.sample) return bams def get_macs(self, count=1, collection=None): """Return macs data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ bams = self.get_bams(count, collection) return macs([bam.sample for bam in bams], use_background=False) def get_cuffquants(self, count=1, collection=None, species='H**o sapiens', build='hg38'): """Return cuffquant data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb') collections = {'collections': [collection]} if collection else {} cuffquants = [] for _ in range(count): cuffquant = self.res.run( 'upload-cxb', input={ 'src': cuffquant_path, 'source': 'NCBI', 'species': species, 'build': build, }, **collections ) cuffquants.append(cuffquant) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(cuffquant.sample) return cuffquants def get_expression(self, count=1, collection=None, species='H**o sapiens', build='hg38'): """Return expression data objects. :param int count: number of objects to return :param colection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ expression_path = os.path.join(FILES_PATH, 'dummy_expression.tab') collections = {'collections': [collection]} if collection else {} expressions = [] for _ in range(count): expression = self.res.run( 'upload-expression', input={ 'exp': expression_path, 'exp_name': 'test_expression', 'source': 'NCBI', 'species': species, 'build': build, }, **collections ) expressions.append(expression) return expressions def set_slug(self, resource, slug): """Set slug of resource.""" resource.slug = slug resource.save() def make_public(self, resource, permissions=None): """Make resource public.""" if permissions is None: permissions = ['view'] resource.permissions.add_public(permissions) def set_slug_and_make_public(self, resource, slug, permissions=None): """Set slug of resource and make it public.""" self.set_slug(resource, slug) self.make_public(resource, permissions=permissions)
class BaseResdkFunctionalTest(unittest.TestCase): """Base class for functional tests in ReSDK. It generates 2 Resolwe classes for connection to server. One with admin's credentials (``self.res``) and one with normal user's credentials (``self.user_res``). It also includes utility functions to generate data objects of basic types with dummy input files. """ def setUp(self): self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL) self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL) def get_genome(self, collection=None, species='H**o sapiens', build='hg38'): """Return genome data object. :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta') collections = {'collections': [collection]} if collection else {} return self.res.run('upload-genome', input={ 'src': genome_path, 'species': species, 'build': build, }, **collections) def get_gtf(self, collection=None, species='H**o sapiens', build='hg38'): """Return gff3 data object. :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf') collections = {'collections': [collection]} if collection else {} return self.res.run('upload-gtf', input={ 'src': gtf_path, 'source': 'NCBI', 'species': species, 'build': build, }, **collections) def get_reads(self, count=1, collection=None): """Return reads data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq') collections = {'collections': [collection]} if collection else {} reads = [] for _ in range(count): read = self.res.run('upload-fastq-single', input={'src': reads_path}, **collections) reads.append(read) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(read.sample) return reads def get_bams(self, count=1, collection=None, species='H**o sapiens', build='hg38'): """Return bam data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam') collections = {'collections': [collection]} if collection else {} bams = [] for _ in range(count): bam = self.res.run('upload-bam', input={ 'src': bam_path, 'species': species, 'build': build, }, **collections) bams.append(bam) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(bam.sample) return bams def get_macs(self, count=1, collection=None): """Return macs data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ bams = self.get_bams(count, collection) return macs([bam.sample for bam in bams], use_background=False) def get_cuffquants(self, count=1, collection=None, species='H**o sapiens', build='hg38'): """Return cuffquant data objects. :param int count: number of objects to return :param collection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb') collections = {'collections': [collection]} if collection else {} cuffquants = [] for _ in range(count): cuffquant = self.res.run('upload-cxb', input={ 'src': cuffquant_path, 'source': 'NCBI', 'species': species, 'build': build, }, **collections) cuffquants.append(cuffquant) # TODO: Remove this when samples are automatically added to # the collection in resolwe if collection: collection.add_samples(cuffquant.sample) return cuffquants def get_expression(self, count=1, collection=None, species='H**o sapiens', build='hg38'): """Return expression data objects. :param int count: number of objects to return :param colection: If defined, data object will be add to given collections. :type collection: None, int or `~resdk.resources.Collection` """ expression_path = os.path.join(FILES_PATH, 'dummy_expression.tab') collections = {'collections': [collection]} if collection else {} expressions = [] for _ in range(count): expression = self.res.run('upload-expression', input={ 'exp': expression_path, 'exp_name': 'test_expression', 'source': 'NCBI', 'species': species, 'build': build, }, **collections) expressions.append(expression) return expressions def set_slug(self, resource, slug): """Set slug of resource.""" resource.slug = slug resource.save() def make_public(self, resource, permissions=None): """Make resource public.""" if permissions is None: permissions = ['view'] resource.permissions.add_public(permissions) def set_slug_and_make_public(self, resource, slug, permissions=None): """Set slug of resource and make it public.""" self.set_slug(resource, slug) self.make_public(resource, permissions=permissions)
def __init__(self, username=None, password=None, url=DEFAULT_URL): self._res = Resolwe(username, password, url)
data = Table(os.path.join(URL_REMOTE, filename)) if '.tab.gz' in filename: filename = filename.replace('.tab.gz', '.pickle') data.save(filename) dataset = res.run('data-table-upload', input={'src': filename}) # dataset = res.data.get(id=1) annotations['tabular']['file_name'] = filename annotations['tabular']['file_size'] = os.stat(filename).st_size # descriptor schema slug dataset.descriptor_schema = 'data_info' dataset.descriptor = annotations dataset.save() # cleanup os.remove(filename) if __name__ == '__main__': res = Resolwe('admin', 'admin123', 'http://127.0.0.1:8000/') # upload('aml-1k.pickle') with ThreadPoolExecutor() as executor: futures = [executor.submit(upload, sc_file[0]) for sc_file in SC_FILES]