Python Resolwe.run 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: resdk

클래스/타입: Resolwe

메소드/함수: run

hotexamples.com에서의 예제들: 7

Python Resolwe.run - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 resdk.Resolwe.run에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Resolwe(12)

run(5)

자주 사용되는 메소드들

Resolwe (12)

run (5)

예제 #1

파일 보기

class BaseResdkFunctionalTest(unittest.TestCase):
    """Base class for functional tests in ReSDK.

    It generates 2 Resolwe classes for connection to server. One with
    admin's credentials (``self.res``) and one with normal user's
    credentials (``self.user_res``).

    It also includes utility functions to generate data objects of basic
    types with dummy input files.

    """
    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)
        self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)

    def get_genome(self,
                   collection=None,
                   species='H**o sapiens',
                   build='hg38'):
        """Return genome data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-genome',
                            input={
                                'src': genome_path,
                                'species': species,
                                'build': build,
                            },
                            **collections)

    def get_gtf(self, collection=None, species='H**o sapiens', build='hg38'):
        """Return gff3 data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-gtf',
                            input={
                                'src': gtf_path,
                                'source': 'NCBI',
                                'species': species,
                                'build': build,
                            },
                            **collections)

    def get_reads(self, count=1, collection=None):
        """Return reads data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq')
        collections = {'collections': [collection]} if collection else {}

        reads = []
        for _ in range(count):
            read = self.res.run('upload-fastq-single',
                                input={'src': reads_path},
                                **collections)
            reads.append(read)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(read.sample)

        return reads

    def get_bams(self,
                 count=1,
                 collection=None,
                 species='H**o sapiens',
                 build='hg38'):
        """Return bam data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam')
        collections = {'collections': [collection]} if collection else {}

        bams = []
        for _ in range(count):
            bam = self.res.run('upload-bam',
                               input={
                                   'src': bam_path,
                                   'species': species,
                                   'build': build,
                               },
                               **collections)

            bams.append(bam)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(bam.sample)

        return bams

    def get_macs(self, count=1, collection=None):
        """Return macs data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bams = self.get_bams(count, collection)

        return macs([bam.sample for bam in bams], use_background=False)

    def get_cuffquants(self,
                       count=1,
                       collection=None,
                       species='H**o sapiens',
                       build='hg38'):
        """Return cuffquant data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb')
        collections = {'collections': [collection]} if collection else {}

        cuffquants = []
        for _ in range(count):
            cuffquant = self.res.run('upload-cxb',
                                     input={
                                         'src': cuffquant_path,
                                         'source': 'NCBI',
                                         'species': species,
                                         'build': build,
                                     },
                                     **collections)

            cuffquants.append(cuffquant)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(cuffquant.sample)

        return cuffquants

    def get_expression(self,
                       count=1,
                       collection=None,
                       species='H**o sapiens',
                       build='hg38'):
        """Return expression data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        expression_path = os.path.join(FILES_PATH, 'dummy_expression.tab')
        collections = {'collections': [collection]} if collection else {}

        expressions = []
        for _ in range(count):
            expression = self.res.run('upload-expression',
                                      input={
                                          'exp': expression_path,
                                          'exp_name': 'test_expression',
                                          'source': 'NCBI',
                                          'species': species,
                                          'build': build,
                                      },
                                      **collections)

            expressions.append(expression)

        return expressions

    def set_slug(self, resource, slug):
        """Set slug of resource."""
        resource.slug = slug
        resource.save()

    def make_public(self, resource, permissions=None):
        """Make resource public."""
        if permissions is None:
            permissions = ['view']
        resource.permissions.add_public(permissions)

    def set_slug_and_make_public(self, resource, slug, permissions=None):
        """Set slug of resource and make it public."""
        self.set_slug(resource, slug)
        self.make_public(resource, permissions=permissions)

예제 #2

파일 보기

파일: base.py 프로젝트: rafecooks/resolwe-bio-py

class BaseResdkFunctionalTest(unittest.TestCase):
    """Base class for functional tests in ReSDK.

    It generates 2 Resolwe classes for connection to server. One with
    admin's credentials (``self.res``) and one with normal user's
    credentials (``self.user_res``).

    It also includes utility functions to generate data objects of basic
    types with dummy input files.

    """
    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)
        self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)

    def get_genome(self, collection=None):
        """Return genome data object.

        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-genome',
                            input={'src': genome_path},
                            **collections)

    def get_gtf(self, collection=None):
        """Return gff3 data object.

        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-gtf',
                            input={
                                'src': gtf_path,
                                'source': 'NCBI'
                            },
                            **collections)

    def get_reads(self, count=1, collection=None):
        """Return reads data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq')
        collections = {'collections': [collection]} if collection else {}

        reads = []
        for _ in range(count):
            read = self.res.run('upload-fastq-single',
                                input={'src': reads_path},
                                **collections)
            reads.append(read)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(read.sample)

        return reads

    def get_bams(self, count=1, collection=None):
        """Return bam data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam')
        collections = {'collections': [collection]} if collection else {}

        bams = []
        for _ in range(count):
            bam = self.res.run('upload-bam',
                               input={'src': bam_path},
                               **collections)
            bam.sample.update_descriptor(  # pylint: disable=no-member
                {'sample': {
                    'organism': 'H**o sapiens'
                }})
            bams.append(bam)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(bam.sample)

        return bams

    def get_macs(self, count=1, collection=None):
        """Return macs data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bams = self.get_bams(count, collection)

        return macs([bam.sample for bam in bams], use_background=False)

    def get_cuffquants(self, count=1, collection=None):
        """Return cuffquant data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb')
        collections = {'collections': [collection]} if collection else {}

        cuffquants = []
        for _ in range(count):
            cuffquant = self.res.run('upload-cxb',
                                     input={
                                         'src': cuffquant_path,
                                         'source': 'NCBI'
                                     },
                                     **collections)

            cuffquants.append(cuffquant)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(cuffquant.sample)

        return cuffquants

예제 #3

파일 보기

def upload_reads():
    """Upload NGS reads to the Resolwe server."""
    description = """Upload single-end or paired-end NGS reads to the Resolwe server.

UPLOAD A SINGLE-END FASTQ FILE:
resolwe-upload-reads -r sample1.fastq.gz

UPLOAD A SET OF MULTI-LANE FASTQ FILES:
resolwe-upload-reads -r sample1_lane1.fastq.gz sample1_lane2.fastq.gz

UPLOAD A PAIR OF PAIRED-END READS FILES:
resolwe-upload-reads -r1 sample1_mate1.fastq.gz -r2 sample1_mate2.fastq.gz

UPLOAD ALL SINGLE-END READS IN A WORKING DIRECTORY:
for reads_file in *.fastq.gz
do
   resolwe-upload-reads -r ${reads_file}
done
"""
    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
                                     description=description)

    parser.add_argument('-a', '--address', default='https://torta.bcm.genialis.com',
                        help='Resolwe server address')
    parser.add_argument('-u', '--username', default='admin', help='Username')
    parser.add_argument('-p', '--password', default='admin', help='User password')
    parser.add_argument('-c', '--collection', nargs='*', type=int, help='Collection ID(s)')
    parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose reporting')
    parser.add_argument('-r', metavar='READS-LANE-X', nargs='*',
                        help='Single-end reads (<read1_lane1 read1_lane2, ..>)')
    parser.add_argument('-r1', metavar='MATE-1-LANE-X', nargs='*',
                        help='Paired-end reads mate1 (<mate1_lane1 mate1_lane2, ..>)')
    parser.add_argument('-r2', metavar='MATE-2-LANE-X', nargs='*',
                        help='Paired-end reads mate2 (<mate1_lane1 mate1_lane2, ..>)')

    args = parser.parse_args()

    if args.verbose:
        resdk_logger.start_logging()

    if not (args.r or (args.r1 and args.r2)) or (args.r and (args.r1 or args.r2)):
        parser.print_help()
        print("\nERROR: define either -r or -r1 and -r2.\n")
        exit(1)

    if not args.r and len(args.r1) != len(args.r2):
        parser.print_help()
        print("\nERROR: -r1 and -r2 file list length must match\n")
        exit(1)

    resolwe = Resolwe(args.username, args.password, args.address)

    if args.r:
        if all(os.path.isfile(file) for file in args.r):
            resolwe.run('upload-fastq-single', {'src': args.r}, collections=args.collection)
        else:
            print("\nERROR: Incorrect file path(s).\n")
            exit(1)
    else:
        if (all(os.path.isfile(file) for file in args.r1)
                and all(os.path.isfile(file) for file in args.r2)):
            resolwe.run('upload-fastq-paired', {'src1': args.r1, 'src2': args.r2},
                        collections=args.collection)
        else:
            print("\nERROR: Incorrect file path(s).\n")
            exit(1)

예제 #4

파일 보기

파일: resdk-example.py 프로젝트: tjanez/resolwe-bio-py

sample = res.sample.get(1)
for data_id in sample.data:
    data = res.data.get(data_id)
    print data.process_name

rose2_list = res.data.filter(type='data:chipseq:rose2:')
rose2 = rose2_list[0]
rose2.download(name='20150531-u266-A-H3K27Ac-ML1949_S2_R1_mapped_peaks_Plot_panel.png')

genome = res.data.get('hg19')
genome_id = genome.id
reads_id = sample.data[0]
aligned = res.run('alignment-bowtie-2-2-3_trim', input={
                      'genome': genome_id,
                      'reads': reads_id,
                      'reporting': {'rep_mode': 'k', 'k_reports': 1}
                  })
aligned.status

aligned.update()
aligned.status

###################

collection_2 = res.collection.get(2)
# collection_2 is a resdk Collection object with id=2

sample_12 = res.sample.get('wt-rep1-10w')
# sample12 is a resdk Sample object with slug='wt-rep1-10w'

예제 #5

파일 보기

class BaseResdkFunctionalTest(unittest.TestCase):
    """Base class for functional tests in ReSDK.

    It generates 2 Resolwe classes for connection to server. One with
    admin's credentials (``self.res``) and one with normal user's
    credentials (``self.user_res``).

    It also includes utility functions to generate data objects of basic
    types with dummy input files.

    """
    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)
        self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)

    def get_genome(self,
                   collection=None,
                   species='H**o sapiens',
                   build='hg38'):
        """Return genome data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-genome',
                            input={
                                'src': genome_path,
                                'species': species,
                                'build': build,
                            },
                            **collections)

    def get_gtf(self, collection=None, species='H**o sapiens', build='hg38'):
        """Return gff3 data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run('upload-gtf',
                            input={
                                'src': gtf_path,
                                'source': 'NCBI',
                                'species': species,
                                'build': build,
                            },
                            **collections)

    def get_reads(self, count=1, collection=None):
        """Return reads data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq')
        collections = {'collections': [collection]} if collection else {}

        reads = []
        for _ in range(count):
            read = self.res.run('upload-fastq-single',
                                input={'src': reads_path},
                                **collections)
            reads.append(read)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(read.sample)

        return reads

    def get_multiplexed(self, count=1, collection=None):
        """ Return demultiplexed reads data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        collections = {'collections': [collection]} if collection else {}
        inputs = {
            'reads': 'pool24.read1.small.qseq.bz2',
            'reads2': 'pool24.read3.small.qseq.bz2',
            'barcodes': 'pool24.read2.small.qseq.bz2',
            'annotation': 'pool24.tsv',
        }
        inputs = {k: os.path.join(FILES_PATH, v) for k, v in inputs.items()}

        multi = [
            self.res.run('upload-multiplexed-paired', inputs, **collections)
            for _ in range(count)
        ]
        return multi

    def get_bams(self,
                 count=1,
                 collection=None,
                 species='H**o sapiens',
                 build='hg38'):
        """Return bam data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam')
        collections = {'collections': [collection]} if collection else {}

        bams = []
        for _ in range(count):
            bam = self.res.run('upload-bam',
                               input={
                                   'src': bam_path,
                                   'species': species,
                                   'build': build,
                               },
                               **collections)

            bams.append(bam)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(bam.sample)

        return bams

    def get_macs(self, count=1, collection=None):
        """Return macs data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bams = self.get_bams(count, collection)

        return macs([bam.sample for bam in bams], use_background=False)

    def get_cuffquants(self,
                       count=1,
                       collection=None,
                       species='H**o sapiens',
                       build='hg38'):
        """Return cuffquant data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb')
        collections = {'collections': [collection]} if collection else {}

        cuffquants = []
        for _ in range(count):
            cuffquant = self.res.run('upload-cxb',
                                     input={
                                         'src': cuffquant_path,
                                         'source': 'NCBI',
                                         'species': species,
                                         'build': build,
                                     },
                                     **collections)

            cuffquants.append(cuffquant)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(cuffquant.sample)

        return cuffquants

    def get_expression(self,
                       count=1,
                       collection=None,
                       species='H**o sapiens',
                       build='hg38'):
        """Return expression data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        expression_path = os.path.join(FILES_PATH, 'dummy_expression.tab')
        collections = {'collections': [collection]} if collection else {}

        expressions = []
        for _ in range(count):
            expression = self.res.run('upload-expression',
                                      input={
                                          'exp': expression_path,
                                          'exp_name': 'test_expression',
                                          'source': 'NCBI',
                                          'species': species,
                                          'build': build,
                                      },
                                      **collections)

            expressions.append(expression)

        return expressions

예제 #6

파일 보기

파일: sequp.py 프로젝트: tristanbrown/resolwe-bio-py

def sequp():
    """Auto-upload NGS reads from directory to the Resolwe server.

    Script checks if there are new reads or annotation files in the
    target directory tree. If both: reads and corresponding annotation
    files are present, upload the reads and set the initial annotation
    based on the annotation file.

    We want to upload files which have not been uploaded yet. We need
    to know the most recent modification date of all uploaded files.
    Files modified later are upload candidates. The timestamp of last
    modification time is stored in config_file.

    """
    # XXX: Saving the config_file in user_data_dir is probably not the
    # right decision. We want multiple users to be able to upload data
    # to the same directory - therefore the config_file should be set
    # for the system and not user dependant.

    # Application data
    config_file = os.path.join(appdirs.user_data_dir(about.__title__, about.__author__), 'config')
    # XXX: Increase to 1h
    change_time_window = 5

    parser = argparse.ArgumentParser(description='Auto-upload NGS reads from '
                                     'directory to the Resolwe server.')

    parser.add_argument('-a', '--address', help='Resolwe server address')
    parser.add_argument('-u', '--username', help='Username')
    parser.add_argument('-p', '--password', help='User password')
    parser.add_argument('-d', '--directory', help='Observed directory with reads')
    parser.add_argument('-f', '--force', action='store_true', help='Force upload of all files')
    parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose reporting')

    args = parser.parse_args()

    if args.verbose:
        resdk_logger.start_logging()

    genialis_url = args.address or os.getenv('GENIALIS_URL') or 'http://localhost:8000'
    genialis_username = args.username or os.getenv('GENIALIS_USERNAME') or 'admin'
    genialis_pass = args.password or os.getenv('GENIALIS_PASS') or 'admin'
    genialis_seq_dir = args.directory or os.getenv('GENIALIS_SEQ_DIR') or os.path.expanduser('~')
    genialis_seq_dir = os.path.normpath(genialis_seq_dir)

    logger.info('Address: {}'.format(genialis_url))
    logger.info('User: {}'.format(genialis_username))
    logger.info('Pass: ******')
    logger.info('Directory: {}'.format(genialis_seq_dir))

    if args.force and os.path.isfile(config_file):
        os.remove(config_file)

    def read_timestamps():
        """Read timestamps from config_file.

        :rtype: Dict of pairs (dir, timestamp)

        """
        if not os.path.isfile(config_file):
            return {}

        data = {}
        with open(config_file, 'r') as file_:
            for line in file_:
                parts = line.strip().split('\t')
                data[parts[0]] = float(parts[1])
        return data

    def write_timestamps(pairs):
        """Write timestamps to config_file."""
        with open(config_file, 'w') as file_:
            for first, second in pairs.items():
                file_.write(str(first) + '\t' + str(second) + '\n')

    def get_timestamp():
        """Get timestamp for GENIALIS_SEQ_DIR."""
        timestamps = read_timestamps()
        return timestamps.get(genialis_seq_dir, 0)

    def set_timestamp(timestamp):
        """Set timestamp for GENIALIS_SEQ_DIR."""
        if os.path.isfile(config_file):  # Update timestamp
            pairs = read_timestamps()
            pairs[genialis_seq_dir] = timestamp
            write_timestamps(pairs)

        else:  # Create config file and add timestamp
            try:
                os.makedirs(os.path.dirname(config_file))
                write_timestamps({genialis_seq_dir: timestamp})
            except OSError:
                # Folder already exists, make the file
                write_timestamps({genialis_seq_dir: timestamp})

    # Get timestamp
    timestamp = get_timestamp()

    # Find new reads
    all_new_read_files = []
    read_file_extensions = ['*.fastq', '*.fastq.gz', '*.fq', '*.fq.gz']

    for root, _, filenames in os.walk(genialis_seq_dir):
        for extension in read_file_extensions:
            for filename in fnmatch.filter(filenames, extension):
                path = os.path.join(root, filename)
                if os.path.getmtime(path) > timestamp:
                    all_new_read_files.append(path)

    # Determnine if the candidate files are fully uploaded by the
    # sequencer. The idea is that the file size does not change in a
    # defined time window (change_time_window).
    sizes1 = {f: os.path.getsize(f) for f in all_new_read_files}
    time.sleep(change_time_window)
    sizes2 = {f: os.path.getsize(f) for f in all_new_read_files}

    all_new_read_files_uploaded = [os.path.normpath(f) for f in all_new_read_files if
                                   sizes1[f] == sizes2[f]]

    # Find all annotation files
    all_annotation_files = []
    annotation_file_extensions = ['*.csv', '*.txt', '*.tsv']
    for root, _, filenames in os.walk(genialis_seq_dir):
        for extension in annotation_file_extensions:
            for filename in fnmatch.filter(filenames, extension):
                all_annotation_files.append(os.path.join(root, filename))

    def parse_annotation_file(annotation_file):
        """Parse annotation file to list of annotation objects."""
        anns = {}
        seq_paths = []
        # We use 'rU' mode to be able to read also files with '\r' chars
        with open(annotation_file, 'rU') as file_:
            try:
                reader = csv.DictReader([row for row in file_ if row[0] != '#'],
                                        delimiter=str('\t'))

                # One line is one annotation (one reads file)
                for row in reader:
                    # Capitalize dict keys
                    row.update({k.upper(): v for k, v in row.items()})

                    if 'FASTQ_PATH' in row:
                        for seqfile in row['FASTQ_PATH'].split(','):
                            seq_path = os.path.normpath(os.path.join(genialis_seq_dir, seqfile))
                            seq_paths.append(seq_path)

                        if all(os.path.isfile(sf) for sf in seq_paths):
                            row['FASTQ_PATH'] = ','.join(seq_paths)
                            anns[row['SAMPLE_NAME']] = row
                            seq_paths = []

            except csv.Error:
                logger.error("File type not supported")
                exit(1)
        return anns

    # Write all annotations to single dict with reads filenames as keys
    annotations = {}
    for ann_file in all_annotation_files:
        annotations.update(parse_annotation_file(ann_file))

    # Connect to Resolwe server
    resolwe = Resolwe(genialis_username, genialis_pass, genialis_url)

    read_schemas = resolwe.api.descriptorschema.get(slug='reads')
    read_schema = read_schemas[0] if read_schemas else None

    # Upload all files in all_new_read_files_uploaded with annotations
    uploaded_files = []

    for sample_n in annotations:
        input_ = {}
        fw_reads = annotations[sample_n]['FASTQ_PATH'].split(',')

        if set(fw_reads).issubset(set(all_new_read_files_uploaded)):
            descriptor, descriptor_schema = None, None

            if read_schema:
                descriptor_schema = read_schema['slug']
                barcode_removed = annotations[sample_n].get('BARCODE_REMOVED', 'N').strip().upper()
                exp_type = EXPERIMENT_TYPE.get(annotations[sample_n]['SEQ_TYPE'].upper(), '')
                descriptor = {
                    'reads_info': {
                        'barcode': annotations[sample_n].get('BARCODE', None),
                        'barcode_removed': True if barcode_removed == 'Y' else False,
                        'instrument_type': annotations[sample_n].get('INSTRUMENT', None),
                        'seq_date': annotations[sample_n].get('SEQ_DATE', None)
                    }
                }
                if exp_type:
                    descriptor['experiment_type'] = exp_type
            # Paired-end reads
            if (annotations[sample_n]['PAIRED_END'] == 'Y'
                    and annotations[sample_n]['FASTQ_PATH_PAIR']):
                rw_reads = annotations[sample_n]['FASTQ_PATH_PAIR'].split(',')
                slug = 'upload-fastq-paired'
                input_['src1'] = fw_reads
                input_['src2'] = [os.path.join(genialis_seq_dir, f) for f in rw_reads]
                file_path = input_['src1'] + input_['src2']

            # Single-end reads
            else:
                slug = 'upload-fastq-single'
                input_['src'] = fw_reads
                file_path = input_['src']

            data = resolwe.run(slug,
                               input=input_,
                               descriptor=descriptor,
                               descriptor_schema=descriptor_schema,
                               data_name=sample_n)

            if data:
                for up_file in file_path:
                    uploaded_files.append(up_file)

                sample = data.sample

                if 'sample' not in sample.descriptor:
                    sample.descriptor['sample'] = {}

                organism = ORGANISMS.get(annotations[sample_n]['ORGANISM'].upper(), '')
                if organism:
                    sample.descriptor['sample']['organism'] = organism

                sample.update_descriptor(sample.descriptor)

            else:
                logger.error("Error uploading {}".format(sample_n))

    # Set the modification timestamp
    modif_times = [os.path.getmtime(f) for f in uploaded_files]
    if modif_times:
        set_timestamp(sorted(modif_times)[-1])

예제 #7

파일 보기

파일: base.py 프로젝트: dblenkus/resolwe-bio-py

class BaseResdkFunctionalTest(unittest.TestCase):
    """Base class for functional tests in ReSDK.

    It generates 2 Resolwe classes for connection to server. One with
    admin's credentials (``self.res``) and one with normal user's
    credentials (``self.user_res``).

    It also includes utility functions to generate data objects of basic
    types with dummy input files.

    """

    def setUp(self):
        self.res = Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)
        self.user_res = Resolwe(USER_USERNAME, USER_PASSWORD, URL)

    def get_genome(self, collection=None, species='H**o sapiens',
                   build='hg38'):
        """Return genome data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        genome_path = os.path.join(FILES_PATH, 'dummy_genome.fasta')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run(
            'upload-genome',
            input={
                'src': genome_path,
                'species': species,
                'build': build,
            },
            **collections
        )

    def get_gtf(self, collection=None, species='H**o sapiens',
                build='hg38'):
        """Return gff3 data object.

        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        gtf_path = os.path.join(FILES_PATH, 'dummy_gtf.gtf')
        collections = {'collections': [collection]} if collection else {}

        return self.res.run(
            'upload-gtf',
            input={
                'src': gtf_path,
                'source': 'NCBI',
                'species': species,
                'build': build,
            },
            **collections
        )

    def get_reads(self, count=1, collection=None):
        """Return reads data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        reads_path = os.path.join(FILES_PATH, 'dummy_reads.fastq')
        collections = {'collections': [collection]} if collection else {}

        reads = []
        for _ in range(count):
            read = self.res.run(
                'upload-fastq-single',
                input={'src': reads_path},
                **collections
            )
            reads.append(read)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(read.sample)

        return reads

    def get_bams(self, count=1, collection=None, species='H**o sapiens',
                 build='hg38'):
        """Return bam data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bam_path = os.path.join(FILES_PATH, 'dummy_bam.bam')
        collections = {'collections': [collection]} if collection else {}

        bams = []
        for _ in range(count):
            bam = self.res.run(
                'upload-bam',
                input={
                    'src': bam_path,
                    'species': species,
                    'build': build,
                },
                **collections
            )

            bams.append(bam)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(bam.sample)

        return bams

    def get_macs(self, count=1, collection=None):
        """Return macs data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        bams = self.get_bams(count, collection)

        return macs([bam.sample for bam in bams], use_background=False)

    def get_cuffquants(self, count=1, collection=None, species='H**o sapiens',
                       build='hg38'):
        """Return cuffquant data objects.

        :param int count: number of objects to return
        :param collection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        cuffquant_path = os.path.join(FILES_PATH, 'dummy_cuffquant.cxb')
        collections = {'collections': [collection]} if collection else {}

        cuffquants = []
        for _ in range(count):
            cuffquant = self.res.run(
                'upload-cxb',
                input={
                    'src': cuffquant_path,
                    'source': 'NCBI',
                    'species': species,
                    'build': build,
                },
                **collections
            )

            cuffquants.append(cuffquant)

            # TODO: Remove this when samples are automatically added to
            #       the collection in resolwe
            if collection:
                collection.add_samples(cuffquant.sample)

        return cuffquants

    def get_expression(self, count=1, collection=None, species='H**o sapiens',
                       build='hg38'):
        """Return expression data objects.

        :param int count: number of objects to return
        :param colection: If defined, data object will be add to given
            collections.
        :type collection: None, int or `~resdk.resources.Collection`
        """
        expression_path = os.path.join(FILES_PATH, 'dummy_expression.tab')
        collections = {'collections': [collection]} if collection else {}

        expressions = []
        for _ in range(count):
            expression = self.res.run(
                'upload-expression',
                input={
                    'exp': expression_path,
                    'exp_name': 'test_expression',
                    'source': 'NCBI',
                    'species': species,
                    'build': build,
                },
                **collections
            )

            expressions.append(expression)

        return expressions

    def set_slug(self, resource, slug):
        """Set slug of resource."""
        resource.slug = slug
        resource.save()

    def make_public(self, resource, permissions=None):
        """Make resource public."""
        if permissions is None:
            permissions = ['view']
        resource.permissions.add_public(permissions)

    def set_slug_and_make_public(self, resource, slug, permissions=None):
        """Set slug of resource and make it public."""
        self.set_slug(resource, slug)
        self.make_public(resource, permissions=permissions)