def set_verbose(self, value): if not isinstance(value, bool): raise biomart.BiomartException("verbose must be set to a boolean value") setattr(self, '_verbose', value) # propagate verbose state to databases and datasets objects for database in self._databases.values(): database.verbose = True for dataset in self._datasets.values(): dataset.verbose = True
def __init__(self, *args, **kwargs): server = kwargs.get('server', None) if not server: url = args[0] server = biomart.BiomartServer(url=url, **kwargs) self.server = server self.name = kwargs.get('name', None) if not self.name: raise biomart.BiomartException( "[BiomartDatabase] 'name' is required") self.display_name = kwargs.get('display_name', self.name) self.virtual_schema = kwargs.get('virtual_schema', 'default') self.verbose = kwargs.get('verbose', False) self._datasets = {}
def __init__(self, *args, **kwargs): # dataset specific attributes self.name = kwargs.get('name', None) if not self.name: raise biomart.BiomartException("[BiomartDataset] 'name' is required") self.display_name = kwargs.get('display_name', self.name) self.interface = kwargs.get('interface', 'default') self.verbose = kwargs.get('verbose', False) # get related biomart server server = kwargs.get('server', None) if not server: url = args[0] server = biomart.BiomartServer(url = url, **kwargs) self.server = server # get related biomart database self.database = kwargs.get('database', None) self._filters = {} self._attribute_pages = {}
def search(self, params={}, header=0, count=False, formatter='TSV'): if not isinstance(params, dict): raise biomart.BiomartException("'params' argument must be a dict") if self.verbose: print("[BiomartDataset:'%s'] Searching using following params:" % self.name) pprint.pprint(params) # read filters and attributes from params filters = params.get('filters', {}) attributes = params.get('attributes', []) # check filters for filter_name, filter_value in filters.items(): dataset_filter = self.filters.get(filter_name, None) if not dataset_filter: if self.verbose: self.show_filters() raise biomart.BiomartException("The filter '%s' does not exist." % filter_name) accepted_values = dataset_filter.accepted_values if len(accepted_values) > 0: incorrect_value = None if (isinstance(filter_value, list) or isinstance(filter_value, tuple)) and dataset_filter.filter_type == 'list': incorrect_value = filter(lambda v: v not in accepted_values, filter_value) elif filter_value not in accepted_values: incorrect_value = filter_value if incorrect_value: error_msg = "the value(s) '%s' for filter '%s' cannot be used." % (incorrect_value, filter_name) error_msg += " Use values from: [%s]" % ", ".join(map(str, accepted_values)) raise biomart.BiomartException(error_msg) # check attributes unless we're only counting if not count: # discover attributes and pages self.fetch_attributes() # no attributes given, use default attributes if not attributes and self._attribute_pages: # get default attribute page page = next(filter(lambda attr_page: attr_page.is_default, self._attribute_pages.values())) # get default attributes from page attributes = [a.name for a in page.attributes.values() if a.is_default] # there is no default attributes, get all attributes from page if not attributes: attributes = [a.name for a in page.attributes.values()] # if no default attributes have been defined, raise an exception if not attributes: raise biomart.BiomartException("at least one attribute is required, none given") for attribute_name in attributes: found = False for page in self._attribute_pages.values(): if attribute_name in page.attributes.keys(): found = True break if not found: if self.verbose: self.show_attributes() raise biomart.BiomartException("The attribute '%s' does not exist." % attribute_name) # guess the attribute page and check if all attributes belong to it. guessed_page = None for tested_page in self._attribute_pages.values(): if set(attributes).issubset(tested_page.attributes.keys()): guessed_page = tested_page break if guessed_page is None: # selected attributes must belong to the same attribute page. if self.verbose: self.show_attributes() raise biomart.BiomartException("You must use attributes that belong to the same attribute page.") # filters and attributes looks ok, start building the XML query root = Element('Query') root.attrib.update({ 'virtualSchemaName': self.database.virtual_schema, 'formatter': 'TSV', 'header': str(header), 'uniqueRows': '1', 'datasetConfigVersion': '0.6', 'count': count is True and '1' or '' }) dataset = SubElement(root, "Dataset") dataset.attrib.update({ 'name': self.name, 'interface': self.interface }) # Add filters to the XML query for filter_name, filter_value in filters.items(): dataset_filter = self.filters[filter_name] filter_elem = SubElement(dataset, "Filter") filter_elem.set('name', filter_name) if 'boolean_list' == dataset_filter.filter_type: if filter_value is True or filter_value.lower() in ('included', 'only'): filter_elem.set('excluded', '0') elif filter_value is False or filter_value.lower() == 'excluded': filter_elem.set('excluded', '1') else: if isinstance(filter_value, list) or isinstance(filter_value, tuple): filter_value = ",".join(map(str, filter_value)) filter_elem.set('value', str(filter_value)) # Add attributes to the XML query, unless we're only counting if not count: for attribute_name in attributes: attribute_elem = SubElement(dataset, "Attribute") attribute_elem.set('name', str(attribute_name)) if self.verbose: print("[BiomartDataset] search query:\n%s" % tostring(root)) return self.server.get_request(query = tostring(root))
def search(self, params = {}, header = 0, count = False): if not isinstance(params, dict): raise biomart.BiomartException("'params' argument must be a dict") if self.verbose: print("[BiomartDataset:'%s'] Searching using following params:" % self.name) pprint.pprint(params) # read filters and attributes from params filters = params.get('filters', {}) attributes = params.get('attributes', []) # check filters for filter_name, filter_value in filters.items(): dataset_filter = self.filters.get(filter_name, None) if not dataset_filter: self.show_filters() raise biomart.BiomartException("The filter '%s' does not exist." % filter_name) if len(dataset_filter.accepted_values) > 0 and filter_value not in dataset_filter.accepted_values: error_msg = "The value '%s' for filter '%s' cannot be used." % (filter_value, filter_name) error_msg += " Use one of: [%s]" % ", ".join(map(str, dataset_filter.accepted_values)) raise biomart.BiomartException(error_msg) # check attributes unless we're only counting if not count: # no attributes given, use default attributes if not attributes: attributes = [a.name for a in self.attributes.values() if a.is_default] # if no default attributes have been defined, raise an exception if not attributes: raise biomart.BiomartException("at least one attribute is required, none given") for attribute_name in attributes: if attribute_name not in self.attributes.keys(): self.show_attributes() raise biomart.BiomartException("The attribute '%s' does not exist." % attribute_name) # selected attributes must belong to the same attribute page. if len(set([self.attributes[a].attribute_page for a in attributes])) > 1: self.show_attributes() raise biomart.BiomartException("You must use attributes that belong to the same attribute page.") # filters and attributes looks ok, start building the XML query root = Element('Query') root.attrib.update({ 'virtualSchemaName': 'default', # TODO: use database virtualSchemaName instead (if any error) 'formatter': 'TSV', 'header': str(header), 'uniqueRows': '1', 'datasetConfigVersion': '0.6', 'count': count is True and '1' or '' }) dataset = SubElement(root, "Dataset") dataset.attrib.update({ 'name': self.name, 'interface': self.interface }) # Add filters to the XML query for filter_name, filter_value in filters.items(): dataset_filter = self.filters[filter_name] filter_elem = SubElement(dataset, "Filter") filter_elem.set('name', filter_name) if 'boolean_list' == dataset_filter.filter_type: if filter_value is True or filter_value.lower() in ('included', 'only'): filter_elem.set('excluded', '0') elif filter_value is False or filter_value.lower() == 'excluded': filter_elem.set('excluded', '1') else: if isinstance(filter_value, list) or isinstance(filter_value, tuple): filter_value = ",".join(map(str, filter_value)) filter_elem.set('value', str(filter_value)) # Add attributes to the XML query, unless we're only counting if not count: for attribute_name in attributes: attribute_elem = SubElement(dataset, "Attribute") attribute_elem.set('name', str(attribute_name)) if self.verbose: print("[BiomartDataset] search query:\n%s" % tostring(root)) return self.server.get_request(query = tostring(root))