Beispiel #1
0
def main():
    """Run RNA-seq workflow."""
    args = parse_arguments()
    data = {
        'input': {
            'reads': args.reads,
            'minlen': args.minlen,
            'trailing': args.trailing,
            'stranded': args.stranded,
            'id_attribute': args.id_attribute
        }
    }
    if args.type.startswith('data:reads:fastq:single:'):
        data['process'] = 'workflow-rnaseq-single'
    if args.type.startswith('data:reads:fastq:paired:'):
        data['process'] = 'workflow-rnaseq-paired'

    gna = GENOMES_AND_ANNOTATIONS[args.genome_and_annotation]
    ada = ADAPTERS[args.adapters]
    res = Resolwe()
    data['input']['genome'] = get_data_id(res, gna['genome'],
                                          'data:genome:fasta:')
    data['input']['annotation'] = get_data_id(res, gna['annotation'],
                                              'data:annotation:gtf:')
    if ada:
        data['input']['adapters'] = get_data_id(res, ada,
                                                'data:seq:nucleotide:')
    print('run {}'.format(dumps(data, separators=(',', ':'))))
Beispiel #2
0
    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)

        # Create data for tests:
        self.reads = self.upload_reads(self.res)
        self.genome = self.upload_genome(self.res)

        # Set permissions for running processes:
        self.allow_run_process(self.res, 'alignment-hisat2')
        super().setUp()
 def __init__(self, user, password, url):
     try:
         self._res = Resolwe(user, password, url)
     except requests.exceptions.InvalidURL as e:
         raise ResolweServerException(e)
     except ValueError as e:  # TODO: is there a better way? resdk returns only ValueError
         msg = str(e)
         if msg == 'Response HTTP status code 400. Invalid credentials?':
             raise ResolweCredentialsException(msg)
         elif msg == 'Server not accessible on {}. Wrong url?'.format(url):
             raise ResolweServerException(msg)
         else:
             raise
Beispiel #4
0
    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)

        self.reads = self.upload_reads(self.res)
        self.genome = self.upload_genome(self.res)
        self.genome_index = self.create_genome_index(self.res, self.genome)
        self.annotation = self.upload_annotation(self.res)

        # Set permissions for running processes:
        self.allow_run_process(self.res, 'upload-fastq-single')
        self.allow_run_process(self.res, 'alignment-hisat2')
        self.allow_run_process(self.res, 'workflow-bbduk-star-htseq')
        # Set permissions for using descriptor_schemas:
        self.allow_use_descriptor_schema(self.res, 'reads')
        self.allow_use_descriptor_schema(self.res, 'sample')
        super().setUp()
Beispiel #5
0
    def test_tutorial_resources(self):
        """Verify existance of resources required for tutorial."""
        res = Resolwe(url='https://app.genialis.com')

        sample_slugs = [
            BaseResdkDocsFunctionalTest.sample_slug,
        ]
        for sample_slug in sample_slugs:
            res.sample.get(sample_slug)  # pylint: disable=no-member

        data_slugs = [
            BaseResdkDocsFunctionalTest.reads_slug,
            BaseResdkDocsFunctionalTest.genome_slug,
            BaseResdkDocsFunctionalTest.annotation_slug,
            BaseResdkDocsFunctionalTest.genome_index_slug,
        ]
        for data_slug in data_slugs:
            res.data.get(slug=data_slug, fields='id')  # pylint: disable=no-member
Beispiel #6
0
    def test_tutorial_resources(self):
        """Verify existence of resources required for tutorial."""
        res = Resolwe(url="https://app.genialis.com")

        sample_slugs = [
            BaseResdkDocsFunctionalTest.sample_slug,
        ]
        for sample_slug in sample_slugs:
            res.sample.get(sample_slug)

        data_slugs = [
            BaseResdkDocsFunctionalTest.reads_slug,
            BaseResdkDocsFunctionalTest.genome_slug,
            BaseResdkDocsFunctionalTest.annotation_slug,
            BaseResdkDocsFunctionalTest.genome_index_slug,
            BaseResdkDocsFunctionalTest.rrna_slug,
            BaseResdkDocsFunctionalTest.rrna_index_slug,
            BaseResdkDocsFunctionalTest.globin_slug,
            BaseResdkDocsFunctionalTest.globin_index_slug,
        ]
        for data_slug in data_slugs:
            res.data.get(slug=data_slug, fields="id")
Beispiel #7
0
class BaseResdkFunctionalTest(unittest.TestCase):
    """Base class for functional tests in ReSDK.

    It generates 2 Resolwe classes for connection to server. One with
    admin's credentials (``self.res``) and one with normal user's
    credentials (``self.user_res``).

    It also includes utility functions to generate data objects of basic
    types with dummy input files.

    """
    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)
        self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)

    def get_genome(self, collection=None):
        """Return genome data object.

        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-genome',
                            input={'src': genome_path},
                            **collections)

    def get_gtf(self, collection=None):
        """Return gff3 data object.

        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-gtf',
                            input={
                                'src': gtf_path,
                                'source': 'NCBI'
                            },
                            **collections)

    def get_reads(self, count=1, collection=None):
        """Return reads data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq')
        collections = {'collections': [collection]} if collection else {}

        reads = []
        for _ in range(count):
            read = self.res.run('upload-fastq-single',
                                input={'src': reads_path},
                                **collections)
            reads.append(read)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(read.sample)

        return reads

    def get_bams(self, count=1, collection=None):
        """Return bam data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam')
        collections = {'collections': [collection]} if collection else {}

        bams = []
        for _ in range(count):
            bam = self.res.run('upload-bam',
                               input={'src': bam_path},
                               **collections)
            bam.sample.update_descriptor(  # pylint: disable=no-member
                {'sample': {
                    'organism': 'H**o sapiens'
                }})
            bams.append(bam)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(bam.sample)

        return bams

    def get_macs(self, count=1, collection=None):
        """Return macs data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bams = self.get_bams(count, collection)

        return macs([bam.sample for bam in bams], use_background=False)

    def get_cuffquants(self, count=1, collection=None):
        """Return cuffquant data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb')
        collections = {'collections': [collection]} if collection else {}

        cuffquants = []
        for _ in range(count):
            cuffquant = self.res.run('upload-cxb',
                                     input={
                                         'src': cuffquant_path,
                                         'source': 'NCBI'
                                     },
                                     **collections)

            cuffquants.append(cuffquant)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(cuffquant.sample)

        return cuffquants
Beispiel #8
0
    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)

        self.reads = self.upload_reads(self.res)
        super().setUp()
Beispiel #9
0
def upload_reads():
    """Upload NGS reads to the Resolwe server."""
    description = """Upload single-end or paired-end NGS reads to the Resolwe server.

UPLOAD A SINGLE-END FASTQ FILE:
resolwe-upload-reads -r sample1.fastq.gz

UPLOAD A SET OF MULTI-LANE FASTQ FILES:
resolwe-upload-reads -r sample1_lane1.fastq.gz sample1_lane2.fastq.gz

UPLOAD A PAIR OF PAIRED-END READS FILES:
resolwe-upload-reads -r1 sample1_mate1.fastq.gz -r2 sample1_mate2.fastq.gz

UPLOAD ALL SINGLE-END READS IN A WORKING DIRECTORY:
for reads_file in *.fastq.gz
do
   resolwe-upload-reads -r ${reads_file}
done
"""
    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
                                     description=description)

    parser.add_argument('-a', '--address', default='https://torta.bcm.genialis.com',
                        help='Resolwe server address')
    parser.add_argument('-u', '--username', default='admin', help='Username')
    parser.add_argument('-p', '--password', default='admin', help='User password')
    parser.add_argument('-c', '--collection', nargs='*', type=int, help='Collection ID(s)')
    parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose reporting')
    parser.add_argument('-r', metavar='READS-LANE-X', nargs='*',
                        help='Single-end reads (<read1_lane1 read1_lane2, ..>)')
    parser.add_argument('-r1', metavar='MATE-1-LANE-X', nargs='*',
                        help='Paired-end reads mate1 (<mate1_lane1 mate1_lane2, ..>)')
    parser.add_argument('-r2', metavar='MATE-2-LANE-X', nargs='*',
                        help='Paired-end reads mate2 (<mate1_lane1 mate1_lane2, ..>)')

    args = parser.parse_args()

    if args.verbose:
        resdk_logger.start_logging()

    if not (args.r or (args.r1 and args.r2)) or (args.r and (args.r1 or args.r2)):
        parser.print_help()
        print("\nERROR: define either -r or -r1 and -r2.\n")
        exit(1)

    if not args.r and len(args.r1) != len(args.r2):
        parser.print_help()
        print("\nERROR: -r1 and -r2 file list length must match\n")
        exit(1)

    resolwe = Resolwe(args.username, args.password, args.address)

    if args.r:
        if all(os.path.isfile(file) for file in args.r):
            resolwe.run('upload-fastq-single', {'src': args.r}, collections=args.collection)
        else:
            print("\nERROR: Incorrect file path(s).\n")
            exit(1)
    else:
        if (all(os.path.isfile(file) for file in args.r1)
                and all(os.path.isfile(file) for file in args.r2)):
            resolwe.run('upload-fastq-paired', {'src1': args.r1, 'src2': args.r2},
                        collections=args.collection)
        else:
            print("\nERROR: Incorrect file path(s).\n")
            exit(1)
Beispiel #10
0
from resdk import Resolwe
res = Resolwe('admin', 'admin', 'https://torta.bcm.genialis.com')

# Recomended: start logging
resdk.start_logging()

sample = res.sample.get(1)
sample.download(type='bam')

samples = res.sample.filter(descriptor__organism="H**o sapiens")
for sample in samples:
    sample.download(type='bam')

sample = res.sample.get(1)
for data_id in sample.data:
    data = res.data.get(data_id)
    print data.process_name

rose2_list = res.data.filter(type='data:chipseq:rose2:')
rose2 = rose2_list[0]
rose2.download(name='20150531-u266-A-H3K27Ac-ML1949_S2_R1_mapped_peaks_Plot_panel.png')

genome = res.data.get('hg19')
genome_id = genome.id
reads_id = sample.data[0]
aligned = res.run('alignment-bowtie-2-2-3_trim', input={
                      'genome': genome_id,
                      'reads': reads_id,
                      'reporting': {'rep_mode': 'k', 'k_reports': 1}
                  })
aligned.status
Beispiel #11
0
class BaseResdkFunctionalTest(unittest.TestCase):
    """Base class for functional tests in ReSDK.

    It generates 2 Resolwe classes for connection to server. One with
    admin's credentials (``self.res``) and one with normal user's
    credentials (``self.user_res``).

    It also includes utility functions to generate data objects of basic
    types with dummy input files.

    """
    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)
        self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)

    def get_genome(self,
                   collection=None,
                   species='H**o sapiens',
                   build='hg38'):
        """Return genome data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-genome',
                            input={
                                'src': genome_path,
                                'species': species,
                                'build': build,
                            },
                            **collections)

    def get_gtf(self, collection=None, species='H**o sapiens', build='hg38'):
        """Return gff3 data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-gtf',
                            input={
                                'src': gtf_path,
                                'source': 'NCBI',
                                'species': species,
                                'build': build,
                            },
                            **collections)

    def get_reads(self, count=1, collection=None):
        """Return reads data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq')
        collections = {'collections': [collection]} if collection else {}

        reads = []
        for _ in range(count):
            read = self.res.run('upload-fastq-single',
                                input={'src': reads_path},
                                **collections)
            reads.append(read)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(read.sample)

        return reads

    def get_multiplexed(self, count=1, collection=None):
        """ Return demultiplexed reads data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        collections = {'collections': [collection]} if collection else {}
        inputs = {
            'reads': 'pool24.read1.small.qseq.bz2',
            'reads2': 'pool24.read3.small.qseq.bz2',
            'barcodes': 'pool24.read2.small.qseq.bz2',
            'annotation': 'pool24.tsv',
        }
        inputs = {k: os.path.join(FILES_PATH, v) for k, v in inputs.items()}

        multi = [
            self.res.run('upload-multiplexed-paired', inputs, **collections)
            for _ in range(count)
        ]
        return multi

    def get_bams(self,
                 count=1,
                 collection=None,
                 species='H**o sapiens',
                 build='hg38'):
        """Return bam data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam')
        collections = {'collections': [collection]} if collection else {}

        bams = []
        for _ in range(count):
            bam = self.res.run('upload-bam',
                               input={
                                   'src': bam_path,
                                   'species': species,
                                   'build': build,
                               },
                               **collections)

            bams.append(bam)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(bam.sample)

        return bams

    def get_macs(self, count=1, collection=None):
        """Return macs data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bams = self.get_bams(count, collection)

        return macs([bam.sample for bam in bams], use_background=False)

    def get_cuffquants(self,
                       count=1,
                       collection=None,
                       species='H**o sapiens',
                       build='hg38'):
        """Return cuffquant data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb')
        collections = {'collections': [collection]} if collection else {}

        cuffquants = []
        for _ in range(count):
            cuffquant = self.res.run('upload-cxb',
                                     input={
                                         'src': cuffquant_path,
                                         'source': 'NCBI',
                                         'species': species,
                                         'build': build,
                                     },
                                     **collections)

            cuffquants.append(cuffquant)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(cuffquant.sample)

        return cuffquants

    def get_expression(self,
                       count=1,
                       collection=None,
                       species='H**o sapiens',
                       build='hg38'):
        """Return expression data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        expression_path = os.path.join(FILES_PATH, 'dummy_expression.tab')
        collections = {'collections': [collection]} if collection else {}

        expressions = []
        for _ in range(count):
            expression = self.res.run('upload-expression',
                                      input={
                                          'exp': expression_path,
                                          'exp_name': 'test_expression',
                                          'source': 'NCBI',
                                          'species': species,
                                          'build': build,
                                      },
                                      **collections)

            expressions.append(expression)

        return expressions
Beispiel #12
0
def sequp():
    """Auto-upload NGS reads from directory to the Resolwe server.

    Script checks if there are new reads or annotation files in the
    target directory tree. If both: reads and corresponding annotation
    files are present, upload the reads and set the initial annotation
    based on the annotation file.

    We want to upload files which have not been uploaded yet. We need
    to know the most recent modification date of all uploaded files.
    Files modified later are upload candidates. The timestamp of last
    modification time is stored in config_file.

    """
    # XXX: Saving the config_file in user_data_dir is probably not the
    # right decision. We want multiple users to be able to upload data
    # to the same directory - therefore the config_file should be set
    # for the system and not user dependant.

    # Application data
    config_file = os.path.join(appdirs.user_data_dir(about.__title__, about.__author__), 'config')
    # XXX: Increase to 1h
    change_time_window = 5

    parser = argparse.ArgumentParser(description='Auto-upload NGS reads from '
                                     'directory to the Resolwe server.')

    parser.add_argument('-a', '--address', help='Resolwe server address')
    parser.add_argument('-u', '--username', help='Username')
    parser.add_argument('-p', '--password', help='User password')
    parser.add_argument('-d', '--directory', help='Observed directory with reads')
    parser.add_argument('-f', '--force', action='store_true', help='Force upload of all files')
    parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose reporting')

    args = parser.parse_args()

    if args.verbose:
        resdk_logger.start_logging()

    genialis_url = args.address or os.getenv('GENIALIS_URL') or 'http://localhost:8000'
    genialis_username = args.username or os.getenv('GENIALIS_USERNAME') or 'admin'
    genialis_pass = args.password or os.getenv('GENIALIS_PASS') or 'admin'
    genialis_seq_dir = args.directory or os.getenv('GENIALIS_SEQ_DIR') or os.path.expanduser('~')
    genialis_seq_dir = os.path.normpath(genialis_seq_dir)

    logger.info('Address: {}'.format(genialis_url))
    logger.info('User: {}'.format(genialis_username))
    logger.info('Pass: ******')
    logger.info('Directory: {}'.format(genialis_seq_dir))

    if args.force and os.path.isfile(config_file):
        os.remove(config_file)

    def read_timestamps():
        """Read timestamps from config_file.

        :rtype: Dict of pairs (dir, timestamp)

        """
        if not os.path.isfile(config_file):
            return {}

        data = {}
        with open(config_file, 'r') as file_:
            for line in file_:
                parts = line.strip().split('\t')
                data[parts[0]] = float(parts[1])
        return data

    def write_timestamps(pairs):
        """Write timestamps to config_file."""
        with open(config_file, 'w') as file_:
            for first, second in pairs.items():
                file_.write(str(first) + '\t' + str(second) + '\n')

    def get_timestamp():
        """Get timestamp for GENIALIS_SEQ_DIR."""
        timestamps = read_timestamps()
        return timestamps.get(genialis_seq_dir, 0)

    def set_timestamp(timestamp):
        """Set timestamp for GENIALIS_SEQ_DIR."""
        if os.path.isfile(config_file):  # Update timestamp
            pairs = read_timestamps()
            pairs[genialis_seq_dir] = timestamp
            write_timestamps(pairs)

        else:  # Create config file and add timestamp
            try:
                os.makedirs(os.path.dirname(config_file))
                write_timestamps({genialis_seq_dir: timestamp})
            except OSError:
                # Folder already exists, make the file
                write_timestamps({genialis_seq_dir: timestamp})

    # Get timestamp
    timestamp = get_timestamp()

    # Find new reads
    all_new_read_files = []
    read_file_extensions = ['*.fastq', '*.fastq.gz', '*.fq', '*.fq.gz']

    for root, _, filenames in os.walk(genialis_seq_dir):
        for extension in read_file_extensions:
            for filename in fnmatch.filter(filenames, extension):
                path = os.path.join(root, filename)
                if os.path.getmtime(path) > timestamp:
                    all_new_read_files.append(path)

    # Determnine if the candidate files are fully uploaded by the
    # sequencer. The idea is that the file size does not change in a
    # defined time window (change_time_window).
    sizes1 = {f: os.path.getsize(f) for f in all_new_read_files}
    time.sleep(change_time_window)
    sizes2 = {f: os.path.getsize(f) for f in all_new_read_files}

    all_new_read_files_uploaded = [os.path.normpath(f) for f in all_new_read_files if
                                   sizes1[f] == sizes2[f]]

    # Find all annotation files
    all_annotation_files = []
    annotation_file_extensions = ['*.csv', '*.txt', '*.tsv']
    for root, _, filenames in os.walk(genialis_seq_dir):
        for extension in annotation_file_extensions:
            for filename in fnmatch.filter(filenames, extension):
                all_annotation_files.append(os.path.join(root, filename))

    def parse_annotation_file(annotation_file):
        """Parse annotation file to list of annotation objects."""
        anns = {}
        seq_paths = []
        # We use 'rU' mode to be able to read also files with '\r' chars
        with open(annotation_file, 'rU') as file_:
            try:
                reader = csv.DictReader([row for row in file_ if row[0] != '#'],
                                        delimiter=str('\t'))

                # One line is one annotation (one reads file)
                for row in reader:
                    # Capitalize dict keys
                    row.update({k.upper(): v for k, v in row.items()})

                    if 'FASTQ_PATH' in row:
                        for seqfile in row['FASTQ_PATH'].split(','):
                            seq_path = os.path.normpath(os.path.join(genialis_seq_dir, seqfile))
                            seq_paths.append(seq_path)

                        if all(os.path.isfile(sf) for sf in seq_paths):
                            row['FASTQ_PATH'] = ','.join(seq_paths)
                            anns[row['SAMPLE_NAME']] = row
                            seq_paths = []

            except csv.Error:
                logger.error("File type not supported")
                exit(1)
        return anns

    # Write all annotations to single dict with reads filenames as keys
    annotations = {}
    for ann_file in all_annotation_files:
        annotations.update(parse_annotation_file(ann_file))

    # Connect to Resolwe server
    resolwe = Resolwe(genialis_username, genialis_pass, genialis_url)

    read_schemas = resolwe.api.descriptorschema.get(slug='reads')
    read_schema = read_schemas[0] if read_schemas else None

    # Upload all files in all_new_read_files_uploaded with annotations
    uploaded_files = []

    for sample_n in annotations:
        input_ = {}
        fw_reads = annotations[sample_n]['FASTQ_PATH'].split(',')

        if set(fw_reads).issubset(set(all_new_read_files_uploaded)):
            descriptor, descriptor_schema = None, None

            if read_schema:
                descriptor_schema = read_schema['slug']
                barcode_removed = annotations[sample_n].get('BARCODE_REMOVED', 'N').strip().upper()
                exp_type = EXPERIMENT_TYPE.get(annotations[sample_n]['SEQ_TYPE'].upper(), '')
                descriptor = {
                    'reads_info': {
                        'barcode': annotations[sample_n].get('BARCODE', None),
                        'barcode_removed': True if barcode_removed == 'Y' else False,
                        'instrument_type': annotations[sample_n].get('INSTRUMENT', None),
                        'seq_date': annotations[sample_n].get('SEQ_DATE', None)
                    }
                }
                if exp_type:
                    descriptor['experiment_type'] = exp_type
            # Paired-end reads
            if (annotations[sample_n]['PAIRED_END'] == 'Y'
                    and annotations[sample_n]['FASTQ_PATH_PAIR']):
                rw_reads = annotations[sample_n]['FASTQ_PATH_PAIR'].split(',')
                slug = 'upload-fastq-paired'
                input_['src1'] = fw_reads
                input_['src2'] = [os.path.join(genialis_seq_dir, f) for f in rw_reads]
                file_path = input_['src1'] + input_['src2']

            # Single-end reads
            else:
                slug = 'upload-fastq-single'
                input_['src'] = fw_reads
                file_path = input_['src']

            data = resolwe.run(slug,
                               input=input_,
                               descriptor=descriptor,
                               descriptor_schema=descriptor_schema,
                               data_name=sample_n)

            if data:
                for up_file in file_path:
                    uploaded_files.append(up_file)

                sample = data.sample

                if 'sample' not in sample.descriptor:
                    sample.descriptor['sample'] = {}

                organism = ORGANISMS.get(annotations[sample_n]['ORGANISM'].upper(), '')
                if organism:
                    sample.descriptor['sample']['organism'] = organism

                sample.update_descriptor(sample.descriptor)

            else:
                logger.error("Error uploading {}".format(sample_n))

    # Set the modification timestamp
    modif_times = [os.path.getmtime(f) for f in uploaded_files]
    if modif_times:
        set_timestamp(sorted(modif_times)[-1])
Beispiel #13
0
 def setUp(self):
     self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)
     self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)
Beispiel #14
0
class BaseResdkFunctionalTest(unittest.TestCase):
    """Base class for functional tests in ReSDK.

    It generates 2 Resolwe classes for connection to server. One with
    admin's credentials (``self.res``) and one with normal user's
    credentials (``self.user_res``).

    It also includes utility functions to generate data objects of basic
    types with dummy input files.

    """

    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)
        self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)

    def get_genome(self, collection=None, species='H**o sapiens',
                   build='hg38'):
        """Return genome data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run(
            'upload-genome',
            input={
                'src': genome_path,
                'species': species,
                'build': build,
            },
            **collections
        )

    def get_gtf(self, collection=None, species='H**o sapiens',
                build='hg38'):
        """Return gff3 data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run(
            'upload-gtf',
            input={
                'src': gtf_path,
                'source': 'NCBI',
                'species': species,
                'build': build,
            },
            **collections
        )

    def get_reads(self, count=1, collection=None):
        """Return reads data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq')
        collections = {'collections': [collection]} if collection else {}

        reads = []
        for _ in range(count):
            read = self.res.run(
                'upload-fastq-single',
                input={'src': reads_path},
                **collections
            )
            reads.append(read)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(read.sample)

        return reads

    def get_bams(self, count=1, collection=None, species='H**o sapiens',
                 build='hg38'):
        """Return bam data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam')
        collections = {'collections': [collection]} if collection else {}

        bams = []
        for _ in range(count):
            bam = self.res.run(
                'upload-bam',
                input={
                    'src': bam_path,
                    'species': species,
                    'build': build,
                },
                **collections
            )

            bams.append(bam)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(bam.sample)

        return bams

    def get_macs(self, count=1, collection=None):
        """Return macs data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bams = self.get_bams(count, collection)

        return macs([bam.sample for bam in bams], use_background=False)

    def get_cuffquants(self, count=1, collection=None, species='H**o sapiens',
                       build='hg38'):
        """Return cuffquant data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb')
        collections = {'collections': [collection]} if collection else {}

        cuffquants = []
        for _ in range(count):
            cuffquant = self.res.run(
                'upload-cxb',
                input={
                    'src': cuffquant_path,
                    'source': 'NCBI',
                    'species': species,
                    'build': build,
                },
                **collections
            )

            cuffquants.append(cuffquant)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(cuffquant.sample)

        return cuffquants

    def get_expression(self, count=1, collection=None, species='H**o sapiens',
                       build='hg38'):
        """Return expression data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        expression_path = os.path.join(FILES_PATH, 'dummy_expression.tab')
        collections = {'collections': [collection]} if collection else {}

        expressions = []
        for _ in range(count):
            expression = self.res.run(
                'upload-expression',
                input={
                    'exp': expression_path,
                    'exp_name': 'test_expression',
                    'source': 'NCBI',
                    'species': species,
                    'build': build,
                },
                **collections
            )

            expressions.append(expression)

        return expressions

    def set_slug(self, resource, slug):
        """Set slug of resource."""
        resource.slug = slug
        resource.save()

    def make_public(self, resource, permissions=None):
        """Make resource public."""
        if permissions is None:
            permissions = ['view']
        resource.permissions.add_public(permissions)

    def set_slug_and_make_public(self, resource, slug, permissions=None):
        """Set slug of resource and make it public."""
        self.set_slug(resource, slug)
        self.make_public(resource, permissions=permissions)
Beispiel #15
0
 def setUp(self):
     self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)
     self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)
Beispiel #16
0
class BaseResdkFunctionalTest(unittest.TestCase):
    """Base class for functional tests in ReSDK.

    It generates 2 Resolwe classes for connection to server. One with
    admin's credentials (``self.res``) and one with normal user's
    credentials (``self.user_res``).

    It also includes utility functions to generate data objects of basic
    types with dummy input files.

    """
    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)
        self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)

    def get_genome(self,
                   collection=None,
                   species='H**o sapiens',
                   build='hg38'):
        """Return genome data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-genome',
                            input={
                                'src': genome_path,
                                'species': species,
                                'build': build,
                            },
                            **collections)

    def get_gtf(self, collection=None, species='H**o sapiens', build='hg38'):
        """Return gff3 data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-gtf',
                            input={
                                'src': gtf_path,
                                'source': 'NCBI',
                                'species': species,
                                'build': build,
                            },
                            **collections)

    def get_reads(self, count=1, collection=None):
        """Return reads data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq')
        collections = {'collections': [collection]} if collection else {}

        reads = []
        for _ in range(count):
            read = self.res.run('upload-fastq-single',
                                input={'src': reads_path},
                                **collections)
            reads.append(read)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(read.sample)

        return reads

    def get_bams(self,
                 count=1,
                 collection=None,
                 species='H**o sapiens',
                 build='hg38'):
        """Return bam data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam')
        collections = {'collections': [collection]} if collection else {}

        bams = []
        for _ in range(count):
            bam = self.res.run('upload-bam',
                               input={
                                   'src': bam_path,
                                   'species': species,
                                   'build': build,
                               },
                               **collections)

            bams.append(bam)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(bam.sample)

        return bams

    def get_macs(self, count=1, collection=None):
        """Return macs data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bams = self.get_bams(count, collection)

        return macs([bam.sample for bam in bams], use_background=False)

    def get_cuffquants(self,
                       count=1,
                       collection=None,
                       species='H**o sapiens',
                       build='hg38'):
        """Return cuffquant data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb')
        collections = {'collections': [collection]} if collection else {}

        cuffquants = []
        for _ in range(count):
            cuffquant = self.res.run('upload-cxb',
                                     input={
                                         'src': cuffquant_path,
                                         'source': 'NCBI',
                                         'species': species,
                                         'build': build,
                                     },
                                     **collections)

            cuffquants.append(cuffquant)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(cuffquant.sample)

        return cuffquants

    def get_expression(self,
                       count=1,
                       collection=None,
                       species='H**o sapiens',
                       build='hg38'):
        """Return expression data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        expression_path = os.path.join(FILES_PATH, 'dummy_expression.tab')
        collections = {'collections': [collection]} if collection else {}

        expressions = []
        for _ in range(count):
            expression = self.res.run('upload-expression',
                                      input={
                                          'exp': expression_path,
                                          'exp_name': 'test_expression',
                                          'source': 'NCBI',
                                          'species': species,
                                          'build': build,
                                      },
                                      **collections)

            expressions.append(expression)

        return expressions

    def set_slug(self, resource, slug):
        """Set slug of resource."""
        resource.slug = slug
        resource.save()

    def make_public(self, resource, permissions=None):
        """Make resource public."""
        if permissions is None:
            permissions = ['view']
        resource.permissions.add_public(permissions)

    def set_slug_and_make_public(self, resource, slug, permissions=None):
        """Set slug of resource and make it public."""
        self.set_slug(resource, slug)
        self.make_public(resource, permissions=permissions)
 def __init__(self, username=None, password=None, url=DEFAULT_URL):
     self._res = Resolwe(username, password, url)
    data = Table(os.path.join(URL_REMOTE, filename))

    if '.tab.gz' in filename:
        filename = filename.replace('.tab.gz', '.pickle')

    data.save(filename)

    dataset = res.run('data-table-upload', input={'src': filename})

    # dataset = res.data.get(id=1)
    annotations['tabular']['file_name'] = filename
    annotations['tabular']['file_size'] = os.stat(filename).st_size

    # descriptor schema slug
    dataset.descriptor_schema = 'data_info'

    dataset.descriptor = annotations
    dataset.save()

    # cleanup
    os.remove(filename)


if __name__ == '__main__':
    res = Resolwe('admin', 'admin123', 'http://127.0.0.1:8000/')
    # upload('aml-1k.pickle')

    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(upload, sc_file[0]) for sc_file in SC_FILES]