コード例 #1
0
ファイル: consume.py プロジェクト: scanon/assembly
    def _get_data(self, body):
        params = json.loads(body)
        filepath = os.path.join(self.datapath, params['ARASTUSER'],
                                str(params['data_id']))
        datapath = filepath
        filepath += "/raw/"
        all_files = []
        user = params['ARASTUSER']
        job_id = params['job_id']
        data_id = params['data_id']
        token = params['oauth_token']
        uid = params['_id']

        self.gc_lock.acquire()
        try:
            self.garbage_collect(self.datapath, self.min_free_space, user, job_id, data_id)
        except:
            logger.error('Unexpected error in GC.')
            raise
        finally:
            self.gc_lock.release()

        ##### Get data from ID #####
        data_doc = self.metadata.get_data_docs(params['ARASTUSER'], params['data_id'])
        if not data_doc:
            raise Exception('Invalid Data ID: {}'.format(params['data_id']))
        logger.debug('data_doc = {}'.format(data_doc))
        if 'kbase_assembly_input' in data_doc:
            params['assembly_data'] = kb_to_asm(data_doc['kbase_assembly_input'])
        elif 'assembly_data' in data_doc:
            params['assembly_data'] = data_doc['assembly_data']

        ##### Get data from assembly_data #####
        self.metadata.update_job(uid, 'status', 'Data transfer')
        with ignored(OSError):
            os.makedirs(filepath)
            touch(filepath)

        file_sets = params['assembly_data']['file_sets']
        for file_set in file_sets:
            if file_set['type'] == 'paired_url':
                file_set['type'] = 'paired'
            elif file_set['type'] == 'single_url':
                file_set['type'] = 'single'
            elif file_set['type'] == 'reference_url':
                file_set['type'] = 'reference'
            file_set['files'] = [] #legacy
            for file_info in file_set['file_infos']:
                #### File is stored on Shock
                if file_info['filename']:
                    local_file = os.path.join(filepath, file_info['filename'])
                    if os.path.exists(local_file):
                        local_file = self.extract_file(local_file)
                        logger.info("Requested data exists on node: {}".format(local_file))
                    else:
                        local_file = self.download_shock(file_info['shock_url'], user, token,
                                                   file_info['shock_id'], filepath)

                elif file_info['direct_url']:
                    local_file = os.path.join(filepath, os.path.basename(file_info['direct_url']))
                    if os.path.exists(local_file):
                        local_file = self.extract_file(local_file)
                        logger.info("Requested data exists on node: {}".format(local_file))
                    else:
                        local_file = self.download_url(file_info['direct_url'], filepath, token=token)
                file_info['local_file'] = local_file
                if file_set['type'] == 'single' and asm.is_long_read_file(local_file):
                    if not 'tags' in file_set:
                        file_set['tags'] = []
                    if not 'long_read' in file_set['tags']:
                        file_set['tags'].append('long_read') # pacbio or nanopore reads
                file_set['files'].append(local_file) #legacy
            all_files.append(file_set)
        return datapath, all_files
コード例 #2
0
ファイル: router.py プロジェクト: kbase/assembly
 def _makedirs(self, dir):
     with ignored(OSError):
         os.makedirs(dir)
コード例 #3
0
ファイル: consume.py プロジェクト: YvetteVv/Gene-Pipeline
    def _get_data(self, body):
        params = json.loads(body)
        filepath = os.path.join(self.datapath, params['ARASTUSER'],
                                str(params['data_id']))
        datapath = filepath
        filepath += "/raw/"
        all_files = []
        user = params['ARASTUSER']
        job_id = params['job_id']
        data_id = params['data_id']
        token = params['oauth_token']
        uid = params['_id']

        self.gc_lock.acquire()
        try:
            self.garbage_collect(self.datapath, self.min_free_space, user,
                                 job_id, data_id)
        except:
            logger.error('Unexpected error in GC.')
            raise
        finally:
            self.gc_lock.release()

        ##### Get data from ID #####
        data_doc = self.metadata.get_data_docs(params['ARASTUSER'],
                                               params['data_id'])
        if not data_doc:
            raise Exception('Invalid Data ID: {}'.format(params['data_id']))
        logger.debug('data_doc = {}'.format(data_doc))
        if 'kbase_assembly_input' in data_doc:
            params['assembly_data'] = kb_to_asm(
                data_doc['kbase_assembly_input'])
        elif 'assembly_data' in data_doc:
            params['assembly_data'] = data_doc['assembly_data']

        ##### Get data from assembly_data #####
        self.metadata.update_job(uid, 'status', 'Data transfer')
        with ignored(OSError):
            os.makedirs(filepath)
            touch(filepath)

        file_sets = params['assembly_data']['file_sets']
        for file_set in file_sets:
            if file_set['type'] == 'paired_url':
                file_set['type'] = 'paired'
            elif file_set['type'] == 'single_url':
                file_set['type'] = 'single'
            elif file_set['type'] == 'reference_url':
                file_set['type'] = 'reference'
            file_set['files'] = []  #legacy
            for file_info in file_set['file_infos']:
                #### File is stored on Shock
                if file_info['filename']:
                    local_file = os.path.join(filepath, file_info['filename'])
                    if os.path.exists(local_file):
                        local_file = self.extract_file(local_file)
                        logger.info("Requested data exists on node: {}".format(
                            local_file))
                    else:
                        local_file = self.download_shock(
                            file_info['shock_url'], user, token,
                            file_info['shock_id'], filepath)

                elif file_info['direct_url']:
                    local_file = os.path.join(
                        filepath, os.path.basename(file_info['direct_url']))
                    if os.path.exists(local_file):
                        local_file = self.extract_file(local_file)
                        logger.info("Requested data exists on node: {}".format(
                            local_file))
                    else:
                        local_file = self.download_url(file_info['direct_url'],
                                                       filepath,
                                                       token=token)
                file_info['local_file'] = local_file
                if file_set['type'] == 'single' and asm.is_long_read_file(
                        local_file):
                    if not 'tags' in file_set:
                        file_set['tags'] = []
                    if not 'long_read' in file_set['tags']:
                        file_set['tags'].append(
                            'long_read')  # pacbio or nanopore reads
                file_set['files'].append(local_file)  #legacy
            all_files.append(file_set)
        return datapath, all_files
コード例 #4
0
ファイル: router.py プロジェクト: YvetteVv/Gene-Pipeline
 def _makedirs(self, dir):
     with ignored(OSError):
         os.makedirs(dir)