Python RmaApi 예제들, allensdk.api.queries.rma_api.RmaApi Python 예제들

예제 #1

0

파일 보기

    def __init__(self):
        self.rma = RmaApi()

        # TODO: only load this, if the file does not exist!
        self.PlaneOfSections = self.rma.json_msg_query(
            url=
            "http://api.brain-map.org/api/v2/data/query.json?criteria=model::PlaneOfSection,rma::options[num_rows$eqall]"
        )

예제 #2

0

파일 보기

 def __init__(self, archive_dir=None):
     self.bp = BiophysicalApi('http://api.brain-map.org')
     self.bp.cache_stimulus = True # change to False to not download the large stimulus NWB file
     self.cta = CellTypesApi()
     self.rma = RmaApi()
     self.neuronal_model_download_endpoint = 'http://celltypes.brain-map.org/neuronal_model/download/'
     self.template_names = {}
     self.nwb_list = []
     
     if archive_dir == None:
         archive_dir = '.'
     self.archive_dir = archive_dir

예제 #3

0

파일 보기

class BiophysicalArchiver(object):
    def __init__(self, archive_dir=None):
        self.bp = BiophysicalApi('http://api.brain-map.org')
        self.bp.cache_stimulus = True # change to False to not download the large stimulus NWB file
        self.cta = CellTypesApi()
        self.rma = RmaApi()
        self.neuronal_model_download_endpoint = 'http://celltypes.brain-map.org/neuronal_model/download/'
        self.template_names = {}
        self.nwb_list = []
        
        if archive_dir == None:
            archive_dir = '.'
        self.archive_dir = archive_dir
    
    def get_template_names(self):
        template_response = self.rma.model_query('NeuronalModelTemplate')
        self.template_names = { t['id']: str(t['name']).replace(' ', '_') for t in template_response}
    
    def get_cells(self):
        return self.cta.list_cells(True, True)
    
    def get_neuronal_models(self, specimen_ids):
        return self.rma.model_query('NeuronalModel',
                                    criteria='specimen[id$in%s]' % ','.join(str(i) for i in specimen_ids),
                                    include='specimen',
                                    num_rows='all')
    
    def get_stimulus_file(self, neuronal_model_id):
        result = self.rma.model_query('NeuronalModel',
                                      criteria='[id$eq%d]' % (neuronal_model_id),
                                      include="specimen(ephys_result(well_known_files(well_known_file_type[name$il'NWB*'])))",
                                      tabular=['path'])
        
        stimulus_filename = result[0]['path']
        
        return stimulus_filename
        
        
        stimulus_filename = os.path.basename(result[0]['path'])
        
        return stimulus_filename
    
    def archive_cell(self, ephys_result_id, specimen_id, template, neuronal_model_id):
        url = self.neuronal_model_download_endpoint + "/%d" % (neuronal_model_id)
        file_name = os.path.join(self.archive_dir, 'ephys_result_%d_specimen_%d_%s_neuronal_model_%d.zip' % (ephys_result_id,
                                                                                                             specimen_id,
                                                                                                             template,
                                                                                                             neuronal_model_id))
        self.rma.retrieve_file_over_http(url, file_name)
        nwb_file = self.get_stimulus_file(neuronal_model_id)
        shutil.copy(nwb_file, self.archive_dir) 
        self.nwb_list.append("%s\t%s" % (os.path.basename(nwb_file),
                                         file_name))

예제 #4

0

파일 보기

파일: HumanMicroarrayData.py 프로젝트: christoph-hue/aba-analysis

    def getAsync(
        self, from_cache, aggregations
    ):  # load data once with use_cache = True, then change it to False to read it from disk instead of fetching it from the api
        #print('HumanMicroarrayData.get() start')
        if not from_cache:
            # we use the RmaApi to query specific information, such as the section data sets of a specific gene
            # for docs, see: https://alleninstitute.github.io/AllenSDK/allensdk.api.queries.rma_api.html
            rma = RmaApi()

            # ok, so we don't need to do multiple requests to forward data from a model to a service, but simply use the pipe-concept:
            # http://help.brain-map.org/display/api/Service+Pipelines
            # e.g. this finds all probes for gabra4 and then queries the microarray-expression data for these probes. note that variables generated by a pipe are referenced by $variableName

            # check out this playground: http://api.brain-map.org/examples/rma_builder/index.html
            # we only use the product 'Human Microarray', which has the id 2 (id$eq2).
            query = (
                "http://api.brain-map.org/api/v2/data/query.json?criteria="
                f"model::Probe,rma::criteria,gene[acronym$il{self.geneAcronym}],products[id$eq2],rma::options[num_rows$eqall],"
                "pipe::list[probes$eq'id'],"
                "service::human_microarray_expression[probes$eq$probes]")

            data = rma.json_msg_query(url=query)

            data = self.transformExpressionData(data)

            structure_map = StructureMap.StructureMap(
                reference_space_key='annotation/ccf_2017',
                resolution=25).get(structure_graph_id=10)  # , annotation, meta

            # https://stackoverflow.com/questions/19125091/pandas-merge-how-to-avoid-duplicating-columns
            # to avoid automatic renaming the duplicate columns by removing any duplicate-column
            # note that our merge-condition is index vs structure_id. because structure_id is the index of structure_map,
            # it is not identified as a duplicate column.
            data = data[data.columns.difference(structure_map.columns)]

            ret = Utils.merge_with_structure(data, structure_map,
                                             HumanMicroarrayData.VALUE_COLUMNS,
                                             aggregations)

            Utils.save(ret, self.cache_path, 'cache.pkl')

            return {'human': ret}

        else:
            if not glob.glob(self.cache_path):
                Utils.log.warning(
                    f"No cached dataframe found. Check whether you have access to file '{self.cache_path}' and whether it exists. Obtaining data without caching now..."
                )
                return self.get(False, aggregations)

            #print('HumanMicroarrayData.get() done')
            return {'human': Utils.load(self.cache_path + 'cache.pkl')}

예제 #5

0

파일 보기

    def __init__(self, master):
        rma = RmaApi()

        self.structgraph = pd.DataFrame(
            rma.model_query('Structure',
                            criteria='[graph_id$eq1]',
                            num_rows='all'))
        volume = 'App_File/annotation_25.nrrd'

        self.readdata, header = nrrd.read(volume)

        Frame.__init__(self, master)
        self.master = master
        self.init_window()

예제 #6

0

파일 보기

def QueryAPI(model,
             criteriaString,
             includeString="",
             optionsString="",
             writeOut=[]):
    # Initiate RMA API for Allen data retrieval
    api = RmaApi()
    # Settings for retrieval
    rows = []
    blockSize = 2000
    done = False
    startRow = 0
    # for i in range(0, total_rows, blockSize):

    while not done:
        print "Row %d, attempting to retrieve %d rows..." % (startRow,
                                                             blockSize)

        tot_rows = len(rows)
        if len(includeString) == 0:
            rows += api.model_query(model=model,
                                    criteria=criteriaString,
                                    options=optionsString,
                                    start_row=startRow,
                                    num_rows=blockSize)
        else:
            rows += api.model_query(model=model,
                                    criteria=criteriaString,
                                    include=includeString,
                                    options=optionsString,
                                    start_row=startRow,
                                    num_rows=blockSize)

        numRows = len(
            rows) - tot_rows  # additional rows retrieved on running the query
        startRow += numRows

        print "%d rows retrieved." % numRows

        # Check if we're at the end of the road
        if numRows == 0 or numRows < blockSize:
            done = True

        # Write out the results as they come in, if requested
        if isinstance(writeOut, basestring):
            json_utilities.write(json_file_name, rows)
            print "Wrote to %s" % json_file_name

    return rows

예제 #7

0

파일 보기

def QueryAPI(model,
             criteriaString,
             includeString=None,
             optionsString=None,
             writeOut=False):
    # Send a query to the Allen API, and assemble results

    # Initiate RMA API for Allen data retrieval
    api = RmaApi()

    # Settings for retrieval
    rows = []
    blockSize = 2000
    done = False
    startRow = 0
    # for i in range(0, total_rows, blockSize):

    while not done:
        print("Row %d, attempting to retrieve %d rows..." %
              (startRow, blockSize))

        tot_rows = len(rows)

        # apiQueryPartial = partial(api.model_query,model=model,criteria=criteriaString,
        # startRow=startRow,num_rows=blockSize)

        rows += api.model_query(model=model,
                                criteria=criteriaString,
                                include=includeString,
                                options=optionsString,
                                start_row=startRow,
                                num_rows=blockSize)

        numRows = len(
            rows) - tot_rows  # additional rows retrieved on running the query
        startRow += numRows

        print("%d rows retrieved." % numRows)

        # Check if we're at the end of the road
        if numRows == 0 or numRows < blockSize:
            done = True

        # Write out the results as they come in, if requested
        if writeOut:
            json_utilities.write(json_file_name, rows)
            print("Wrote to %s" % json_file_name)

    return rows

예제 #8

0

파일 보기

def rma():
    ju.read_url_get = \
        MagicMock(name='read_url_get',
                  return_value={'msg': _msg})
    json.dumps = \
        MagicMock(name='dumps')

    ju.read = \
        MagicMock(name='read',
                  return_value=_msg)

    pj.read_json = \
        MagicMock(name='read_json',
                  return_value=_pd_msg)

    pd.DataFrame.to_csv = \
        MagicMock(name='to_csv')

    pd.DataFrame.from_csv = \
        MagicMock(name='from_csv',
                  return_value=_csv_msg)
    
    os.makedirs = MagicMock(name='makedirs')

    return { 'ju_read_url_get': ju.read_url_get,
             'ju_write': ju.write,
             'ju_read': ju.read,
             'pj_read_json': pj.read_json,
             'to_csv': pd.DataFrame.to_csv,
             'from_csv': pd.DataFrame.from_csv,
             'os_makedirs': os.makedirs,
             'rma_api': RmaApi() }

예제 #9

0

파일 보기

    def build_url(self, service_name, kwargs):
        '''Create a single stage RMA url from a service name and parameters.
        '''
        rma = RmaApi()
        fmt = kwargs.get('fmt', 'json')

        schema_entry = ConnectedServices._schema[service_name]

        params = []

        for parameter in schema_entry['parameters']:
            value = kwargs.get(parameter['name'], None)
            if value is not None:
                params.append((parameter['name'], value))

        service_stage = rma.service_stage(service_name, params)

        url = rma.build_query_url([service_stage], fmt)

        return url

예제 #10

0

파일 보기

파일: connected_services.py 프로젝트: FloFra/AllenSDK

 def build_url(self, service_name, kwargs):
     '''Create a single stage RMA url from a service name and parameters.
     '''
     rma = RmaApi()
     fmt = kwargs.get('fmt', 'json')
     
     schema_entry = ConnectedServices._schema[service_name]
     
     params = []
     
     for parameter in schema_entry['parameters']:
         value = kwargs.get(parameter['name'], None)
         if value != None:
             params.append((parameter['name'], value))
     
     service_stage = rma.service_stage(service_name,
                                       params)
     
     url = rma.build_query_url([service_stage], fmt)
     
     return url

예제 #11

0

파일 보기

def generateQueryRows(model, criteria, options='[only$eq''genes.entrez_id,data_sets.id'']',
                      include='structure,section_data_set(genes)', startRow=0, blockSize=2000, verbose=True):
    """
    GENERATOR: obtain object for iteratively obtaining rows from an AllenSDK Rma query.
    ARGUMENTS:
        model: the type of query (str)
        criteria: criteria for the query (str). Where a function to automatically obtain this string does not exist,
            please write one.
        options: options string for query
        include: include string for query
        startRow: at which row of the query result would you like to start retriving data? (int)
        blockSize: how many rows would you like to retrive with each iteration? (int)
        verbose: additional print out, as per usual (bool)
    DEPENDENCIES: Packages/modules/etc: RmaApi (allensdk.api.queries.rma_api)
        (1) RmaApi().model_query() ;
        1) allensdk front end for data access (class method)
    YEILDS: rows of data ([{}, {}...]; json.dump() compatable)
    """
    # [only$eq''genes.entrez_id,data_sets.id'']
    if verbose:
        print('Obtaining generator object for {} query'.format(model))
        print('Criteria: {}'.format(criteria))
        print('Options: {}'.format(options))
        print('Include: {}'.format(include))
    done = False
    api = RmaApi()
    while not done:
        if verbose:
            print('Querying rows {} - {}'.format(startRow, startRow + blockSize))
        queryResult = api.model_query(model=model, criteria=criteria, options=options,
                                      include=include, start_row=startRow, num_rows=blockSize)
        if type(queryResult) == str:
            raise ValueError('Query returned an error message. RMA query likely contains a syntactic error')
        retrieved = len(queryResult)
        if verbose:
            print('{} rows were found'.format(retrieved))
        startRow += retrieved
        print('new start row: {}'.format(startRow))
        done = retrieved == 0
        yield queryResult

예제 #12

0

파일 보기

class AllenSdkHelper:
    def __init__(self):
        self.rma = RmaApi()

        # TODO: only load this, if the file does not exist!
        self.PlaneOfSections = self.rma.json_msg_query(
            url=
            "http://api.brain-map.org/api/v2/data/query.json?criteria=model::PlaneOfSection,rma::options[num_rows$eqall]"
        )

        # path=Utils.makedir(f'cache\\models') + '\\PlaneOfSection.json',

    def getPlaneOfSections(self):
        return self.PlaneOfSections

예제 #13

0

파일 보기

def rma5():
    ju.read_url_get = \
        MagicMock(name='read_url_get',
                  side_effect = [{'msg': _msg},
                                 {'msg': _msg},
                                 {'msg': _msg},
                                 {'msg': _msg},
                                 {'msg': _msg}])

    ju.read = \
        MagicMock(name='read',
                  return_value=[{'whatever': True},
                                {'whatever': True},
                                {'whatever': True},
                                {'whatever': True},
                                {'whatever': True}])

    pj.read_json = \
        MagicMock(name='read_json',
                  return_value=pd.DataFrame([{'whatever': True},
                                             {'whatever': True},
                                             {'whatever': True},
                                             {'whatever': True},
                                             {'whatever': True}]))

    pd.DataFrame.to_csv = \
        MagicMock(name='to_csv')

    pd.DataFrame.from_csv = \
        MagicMock(name='from_csv',
                  return_value=_csv_msg)
    
    os.makedirs = MagicMock(name='makedirs')

    return { 'ju_read_url_get': ju.read_url_get,
             #'ju_write': json.dumps,
             'ju_read': ju.read,
             'pj_read_json': pj.read_json,
             'to_csv': pd.DataFrame.to_csv,
             'from_csv': pd.DataFrame.from_csv,
             'os_makedirs': os.makedirs,
             'rma_api': RmaApi() }

예제 #14

0

파일 보기

파일: OnlyJoking.py 프로젝트: marijam0204/AllenSDK

from allensdk.api.queries.rma_api import RmaApi

rma = RmaApi()

data = rma.model_query('Atlas', criteria="[name$il'*Mouse*']")

예제 #15

0

파일 보기

파일: test_cache.py 프로젝트: kevinbdsouza/lstm_glif

def rma():
    return RmaApi()

예제 #16

0

파일 보기

파일: test_cacheable.py 프로젝트: the-moliver/AllenSDK

 def get_hemispheres():
     return RmaApi().model_query(model='Hemisphere')

예제 #17

0

파일 보기

파일: test_cacheable.py 프로젝트: the-moliver/AllenSDK

 def get_hemispheres_excpt():
     return RmaApi().model_query(model='Hemisphere',
                                 excpt=['symbol'])

예제 #18

0

파일 보기

파일: GenerateLayerOntology.py 프로젝트: joskid/cell-locator

def generate_layer_ontology(output):
    all_structs = []

    root = RmaApi().model_query("Structure",
                                criteria="[graph_id$eq1],[acronym$eqgrey]")[0]

    all_structs.append(root)

    layers = [{
        'id': 900000000,
        'acronym': 'Isocortex1',
        'name': 'Isocortex layer 1',
        'color_hex_triplet': '7fc97f'
    }, {
        'id': 900000001,
        'acronym': 'Isocortex2/3',
        'name': 'Isocortex layer 2/3',
        'color_hex_triplet': 'beaed4'
    }, {
        'id': 900000002,
        'acronym': 'Isocortex4',
        'name': 'Isocortex layer 4',
        'color_hex_triplet': 'fdc086'
    }, {
        'id': 900000003,
        'acronym': 'Isocortex5',
        'name': 'Isocortex layer 5',
        'color_hex_triplet': 'ffff99'
    }, {
        'id': 900000004,
        'acronym': 'Isocortex6a',
        'name': 'Isocortex layer 6a',
        'color_hex_triplet': '386cb0'
    }, {
        'id': 900000005,
        'acronym': 'Isocortex6b',
        'name': 'Isocortex layer 6b',
        'color_hex_triplet': 'f0027f'
    }]

    all_structs += layers

    for layer in layers:
        layer['structure_id_path'] = '/%d/%d/' % (root['id'], layer['id'])
        layer['parent_structure_id'] = root['id']

        structs = RmaApi().model_query("Structure",
                                       criteria="structure_sets[name$eq'%s']" %
                                       layer['name'])

        for struct in structs:
            struct['structure_id_path'] = '/%d/%d/%d/' % (
                root['id'], layer['id'], struct['id'])
            struct['color_hex_triplet'] = layer['color_hex_triplet']
            struct['parent_structure_id'] = layer['id']

        all_structs += structs

    # Generate structure similar to the one returned by the http://api.brain-map.org/api/v2/data/Structure/query.json
    content = {
        "msg": all_structs,
        "num_rows": len(all_structs),
        "start_row": 0,
        "success": True,
        "total_rows": len(all_structs)
    }

    ju.write(output, content)

예제 #19

0

파일 보기

파일: data_api_client_ex.py 프로젝트: nataliaorlova/AllenSDK_Natalia_Meso

#===============================================================================
# example 1
#===============================================================================

from allensdk.api.queries.rma_api import RmaApi

rma = RmaApi()

data = rma.model_query('Atlas', criteria="[name$il'*Mouse*']")

#===============================================================================
# example 2
#===============================================================================

associations = ''.join(
    ['[id$eq1]', 'structure_graph(ontology),', 'graphic_group_labels'])

atlas_data = rma.model_query(
    'Atlas',
    include=associations,
    criteria=associations,
    only=[
        'atlases.id', 'atlases.name', 'atlases.image_type', 'ontologies.id',
        'ontologies.name', 'structure_graphs.id', 'structure_graphs.name',
        'graphic_group_labels.id', 'graphic_group_labels.name'
    ])

#===============================================================================
# example 3
#===============================================================================

예제 #20

0

파일 보기

파일: data_api_client_ex.py 프로젝트: AllenInstitute/AllenSDK

#===============================================================================
# example 1
#===============================================================================

from allensdk.api.queries.rma_api import RmaApi

rma = RmaApi()

data = rma.model_query('Atlas',
                        criteria="[name$il'*Mouse*']")

#===============================================================================
# example 2
#===============================================================================

associations = ''.join(['[id$eq1]',
                        'structure_graph(ontology),',
                        'graphic_group_labels'])

atlas_data = rma.model_query('Atlas',
                                include=associations,
                                criteria=associations,
                                only=['atlases.id',
                                    'atlases.name',
                                    'atlases.image_type',
                                    'ontologies.id',
                                    'ontologies.name',
                                    'structure_graphs.id',
                                    'structure_graphs.name',
                                    'graphic_group_labels.id',
                                    'graphic_group_labels.name'])

예제 #21

0

파일 보기

    def getAsync(self, from_cache, aggregations):
        # load data once with from_cache = False, then change it to True to read it from disk instead of fetching it from the api
        if not from_cache:
            # we use the RmaApi to query specific information, such as the section data sets of a specific gene
            # for docs, see: https://alleninstitute.github.io/AllenSDK/allensdk.api.queries.rma_api.html
            rma = RmaApi()

            # there might be a way to retrieve data in higher resolution, as stated here (default is 25, 10 is also available - but resolution is ignored for download_gene_expression_grid_data)
            # https://alleninstitute.github.io/AllenSDK/_modules/allensdk/api/queries/grid_data_api.html
            # See `Downloading 3-D Projection Grid Data <http://help.brain-map.org/display/api/Downloading+3-D+Expression+Grid+Data#name="Downloading3-DExpressionGridData-DOWNLOADING3DPROJECTIONGRIDDATA">`_
            gdApi = GridDataApi()

            # http://api.brain-map.org/examples/rma_builder/index.html
            # http://api.brain-map.org/examples/rma_builder/rma_builder.html
            # https://allensdk.readthedocs.io/en/latest/data_api_client.html
            sectionDataSets = pd.DataFrame(
                rma.model_query(
                    model='SectionDataSet',
                    #! criteria="plane_of_section[name$eqcoronal]", note that saggital only spans the left hemisphere, so this is tough to compare with human data.
                    filters={'failed': 'false'},
                    include=
                    f"genes[acronym$il{self.geneAcronym}],products[id$eq1]",  # $il = case-insensitive like | yes, weird notation... id = 1 = mouse brain atlas (not developing!)
                    num_rows='all'))

            # model's documentation: http://api.brain-map.org/doc/SectionDataSet.html
            # https://community.brain-map.org/t/attempting-to-download-substructures-for-coronal-p56-mouse-atlas/174/2

            experiments = {}

            # http://help.brain-map.org/display/mousebrain/Documentation
            annotations = np.fromfile(Utils.getRelativeFilepath(
                "annotations\\P56_Mouse_gridAnnotation\\gridAnnotation.raw"),
                                      dtype="uint32")

            # https://community.brain-map.org/t/how-to-acquire-the-structure-label-for-the-expression-grid-data/150/4
            # for Mouse P56, structure_graph_id = 1 according to http://help.brain-map.org/display/api/Atlas+Drawings+and+Ontologies
            structure_map = StructureMap.StructureMap(
                reference_space_key='annotation/ccf_2017',
                resolution=25).get(structure_graph_id=1)  # , annotation, meta
            # from http://alleninstitute.github.io/AllenSDK/_static/examples/nb/reference_space.html#Downloading-an-annotation-volume

            for index, row in sectionDataSets.iterrows(
            ):  # https://stackoverflow.com/questions/16476924/how-to-iterate-over-rows-in-a-dataframe-in-pandas
                exp_id = row['id']
                exp_path = f"cache\\mouse_ish-expr\\{exp_id}\\"

                try:
                    # https://community.brain-map.org/t/whole-mouse-brain-gene-expression-data/447/4
                    # explanation of what "energy" means here:
                    # expression density = sum of expressing pixels / sum of all pixels in division
                    # expression intensity = sum of expressing pixel intensity / sum of expressing pixels
                    # expression energy = expression intensity * expression density

                    gdApi.download_gene_expression_grid_data(
                        exp_id, GridDataApi.ENERGY, exp_path)

                    expression_levels = np.fromfile(exp_path + "energy.raw",
                                                    dtype=np.float32)

                    # According to the doc @ http://help.brain-map.org/display/api/Downloading+3-D+Expression+Grid+Data
                    # we have "A raw uncompressed float (32-bit) little-endian volume representing average expression energy per voxel.
                    # A value of "-1" represents no data. This file is returned by default if the volumes parameter is null."
                    data = pd.DataFrame({
                        Constants.EXPR_LVL: expression_levels,
                        "structure_id": annotations
                    })

                    # some expression_levels are assigned to a structure of id 0. same is true for Jure's approach.
                    # according to the Allen institue, this is just due to background-noise:
                    # https://community.brain-map.org/t/how-to-acquire-the-structure-label-for-the-expression-grid-data/150/4
                    # values of -1 mean "no value obtained", hence we filter them out:
                    data = data[(data[Constants.EXPR_LVL] != -1)
                                & (data.structure_id != 0)]

                    data[Constants.Z_SCORE] = Utils.z_score(
                        data[Constants.EXPR_LVL])

                    # https://stackoverflow.com/questions/31528819/using-merge-on-a-column-and-index-in-pandas
                    # https://stackoverflow.com/questions/45147100/pandas-drop-columns-with-all-nans

                    name = f'mouse_{exp_id}_{Constants.PlaneOfSections[row["plane_of_section_id"]]}'
                    data = Utils.merge_with_structure(
                        data, structure_map, MouseISHData.VALUE_COLUMNS,
                        aggregations)

                    Utils.save(data, self.cache_path, name + '.pkl')

                    experiments['mouse - ' + Constants.PlaneOfSections[
                        row["plane_of_section_id"]]] = data
                except Exception as e:
                    print(
                        f"Error retrieving mouse-ish experiment {exp_id}: {str(e)}"
                    )
                    raise e

            return experiments
        else:
            if not glob.glob(self.cache_path):
                Utils.log.warning(
                    f"No cached dataframe found. Check whether you have access to file '{self.cache_path}' and whether it exists. Obtaining data without caching now..."
                )
                return self.get(False, aggregations)

            return {
                'mouse - ' + Utils.getFilename(file).split('_')[2]:
                Utils.load(file)
                for file in glob.glob(f'{self.cache_path}/*.pkl')
            }

예제 #22

0

파일 보기

class ISHFetcher:
    ''' A downloader object for Section Data Sets

    Methods
    -------

    find_id_ish:
        Returns the ids of Section Data Sets (a single gene experiment) sorted by qc time

    download_grid_all:
        Dowloads all the expression energy 3d density file (200um grid) that satisfy the query

    download_grid_recent:
        Dowloads the most recently qc-ed expression energy 3d density file (200um grid) that satisfy the query

    Attributes
    ----------
    rma:
        Rma Api instance
    gda
        GridData Api instance
    res
        results of the find_id_ish query
    '''
    def __init__(self) -> None:
        self.rma = RmaApi()
        self.gda = GridDataApi()
        self.res = None  # type: List

    def find_id_ish(self,
                    gene: str,
                    sag_or_cor: str = "sagittal",
                    adu_or_dev: str = "adult",
                    time_point: str = "P56") -> List:
        """Returns the ids of Section Data Sets (a single gene experiment)

        Args
        ----
        gene: str
            the gene to search for

        sag_or_cor: str (accepts * wild cards)
            `coronal` or `sagittal` or `*`

        adu_or_dev: str (accepts * wild cards)
            `adult`, `development`, `both`

        time_point: str (it will be autmatically wildcarded)
            e.g. "P56", "E", "E13", "P"

        Returns
        -------
        list of ids:
            sorted by most_recent to mose ancient

        """

        if adu_or_dev == "adult" and "E" in time_point:
            raise ValueError("there is not adult with age %s" % time_point)

        if adu_or_dev == "adult":
            adu_or_dev = "Mouse"
        elif adu_or_dev == "development":
            adu_or_dev = "DevMouse"
        elif adu_or_dev == "both":
            adu_or_dev = "*Mouse"
        else:
            raise ValueError("adu_or_dev='%s' is not valid" % adu_or_dev)
        criteria = [
            "[failed$eq'false']",
            "reference_space[name$li'*%s*']" % time_point,
            "products[abbreviation$li'%s']" % adu_or_dev,
            "plane_of_section[name$li'%s']" % sag_or_cor,
            "genes[acronym$eq'%s']" % gene
        ]
        # include='reference_space',
        self.res = self.rma.model_query("SectionDataSet",
                                        criteria=','.join(criteria),
                                        only=["id", "qc_date"],
                                        num_rows='all')
        if isinstance(self.res, str):
            raise ValueError("Bad query! Server returned :\n%s" % self.res)

        if self.res == []:
            return []

        qc_date = []
        for i in self.res:
            if i["qc_date"] is None:
                qc_date.append('')
            else:
                qc_date.append(i["qc_date"])

        ix = np.argsort(qc_date)
        ix = ix[::-1]

        results = []
        for i in ix:
            results.append(int(self.res[i]["id"]))

        return results

    def download_grid_all(self,
                          gene: str,
                          folder: str = '../data',
                          sag_or_cor: str = "sagittal",
                          adu_or_dev: str = "adult",
                          time_point: str = "P56") -> None:
        """Dowloads all the files

         Args
        ----
        gene: str
            the gene to search for

        sag_or_cor: str (accepts * wild cards)
            `coronal` or `sagittal` or `*`

        adu_or_dev: str (accepts * wild cards)
            `adult`, `development`, `both`

        time_point: str (it will be autmatically wildcarded)
            e.g. "P56", "E", "E13", "P"

        """
        ids = self.find_id_ish(gene,
                               sag_or_cor=sag_or_cor,
                               adu_or_dev=adu_or_dev,
                               time_point=time_point)
        for idd in ids:
            self.gda.download_expression_grid_data(
                idd,
                path=os.path.join(
                    folder,
                    "%s_%s_%s_%s.zip" % (gene, sag_or_cor, time_point, idd)))

    def download_grid_recent(self,
                             gene: str,
                             folder: str = '../data',
                             sag_or_cor: str = "sagittal",
                             adu_or_dev: str = "adult",
                             time_point: str = "P56") -> Union[str, bool]:
        """Dowloads the most recently qc-ed file among the ones available

         Args
        ----
        gene: str
            the gene to search for

        sag_or_cor: str (accepts * wild cards)
            `coronal` or `sagittal` or `*`

        adu_or_dev: str (accepts * wild cards)
            `adult`, `development`, `both`

        time_point: str (it will be autmatically wildcarded)
            e.g. "P56", "E", "E13", "P"

        Returns
        -------
        output_path: output_path or bool
            if the download was successfull returns the path to the file otherwise False

        """
        ids = self.find_id_ish(gene,
                               sag_or_cor=sag_or_cor,
                               adu_or_dev=adu_or_dev,
                               time_point=time_point)
        try:
            idd = ids[0]
            output_path = os.path.join(
                folder,
                "%s_%s_%s_%s.zip" % (gene, sag_or_cor, time_point, idd))
            self.gda.download_expression_grid_data(idd, path=output_path)
            return output_path
        except IndexError:
            logging.warn("Experiment %s was never performed" % gene)
            return False

예제 #23

0

파일 보기

 def __init__(self) -> None:
     self.rma = RmaApi()
     self.gda = GridDataApi()
     self.res = None  # type: List