def test_from_xml_to_xml(self): # Get the raw xml thing # TODO maybe get this via get_flow(), which would have to be refactored to allow getting only the xml dictionary # TODO: no sklearn flows. for flow_id in [ 3, 5, 7, 9, ]: flow_xml = _perform_api_call("flow/%d" % flow_id) flow_dict = xmltodict.parse(flow_xml) flow = openml.OpenMLFlow._from_dict(flow_dict) new_xml = flow._to_xml() flow_xml = flow_xml.replace(' ', '').replace( '\t', '').strip().replace('\n\n', '\n').replace('"', '"') flow_xml = re.sub(r'^$', '', flow_xml) new_xml = new_xml.replace(' ', '').replace('\t', '').strip().replace( '\n\n', '\n').replace('"', '"') new_xml = re.sub(r'^$', '', new_xml) self.assertEqual(new_xml, flow_xml)
def _list_flows(api_call): # TODO add proper error handling here! xml_string = _perform_api_call(api_call) flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow', )) # Minimalistic check if the XML is useful assert type(flows_dict['oml:flows']['oml:flow']) == list, \ type(flows_dict['oml:flows']) assert flows_dict['oml:flows']['@xmlns:oml'] == \ 'http://openml.org/openml', flows_dict['oml:flows']['@xmlns:oml'] flows = dict() for flow_ in flows_dict['oml:flows']['oml:flow']: fid = int(flow_['oml:id']) flow = { 'id': fid, 'full_name': flow_['oml:full_name'], 'name': flow_['oml:name'], 'version': flow_['oml:version'], 'external_version': flow_['oml:external_version'], 'uploader': flow_['oml:uploader'] } flows[fid] = flow return flows
def test_from_xml_to_xml(self): # Get the raw xml thing # TODO maybe get this via get_flow(), which would have to be refactored # to allow getting only the xml dictionary # TODO: no sklearn flows. for flow_id in [ 3, 5, 7, 9, ]: flow_xml = _perform_api_call("flow/%d" % flow_id, request_method="get") flow_dict = xmltodict.parse(flow_xml) flow = openml.OpenMLFlow._from_dict(flow_dict) new_xml = flow._to_xml() flow_xml = (flow_xml.replace(" ", "").replace( "\t", "").strip().replace("\n\n", "\n").replace(""", '"')) flow_xml = re.sub(r"^$", "", flow_xml) new_xml = (new_xml.replace(" ", "").replace( "\t", "").strip().replace("\n\n", "\n").replace(""", '"')) new_xml = re.sub(r"^$", "", new_xml) self.assertEqual(new_xml, flow_xml)
def flow_exists(name, external_version): """Retrieves the flow id of the flow uniquely identified by name + external_version. Parameter --------- name : string Name of the flow version : string Version information associated with flow. Returns ------- flow_exist : int flow id iff exists, False otherwise Notes ----- see http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version """ if not (isinstance(name, six.string_types) and len(name) > 0): raise ValueError('Argument \'name\' should be a non-empty string') if not (isinstance(name, six.string_types) and len(external_version) > 0): raise ValueError('Argument \'version\' should be a non-empty string') xml_response = _perform_api_call("flow/exists", data={'name': name, 'external_version': external_version}) result_dict = xmltodict.parse(xml_response) flow_id = int(result_dict['oml:flow_exists']['oml:id']) if flow_id > 0: return flow_id else: return False
def test_from_xml_to_xml(self): # Get the raw xml thing # TODO maybe get this via get_flow(), which would have to be refactored to allow getting only the xml dictionary # TODO: no sklearn flows. for flow_id in [3, 5, 7, 9, ]: flow_xml = _perform_api_call("flow/%d" % flow_id) flow_dict = xmltodict.parse(flow_xml) flow = openml.OpenMLFlow._from_dict(flow_dict) new_xml = flow._to_xml() flow_xml = flow_xml.replace(' ', '').replace('\t', '').strip().replace('\n\n', '\n').replace('"', '"') flow_xml = re.sub(r'^$', '', flow_xml) new_xml = new_xml.replace(' ', '').replace('\t', '').strip().replace('\n\n', '\n').replace('"', '"') new_xml = re.sub(r'^$', '', new_xml) self.assertEqual(new_xml, flow_xml)
def get_flow(flow_id): """Download the OpenML flow for a given flow ID. Parameters ---------- flow_id : int The OpenML flow id. """ # TODO add caching here! try: flow_id = int(flow_id) except: raise ValueError("Flow ID must be an int, got %s." % str(flow_id)) flow_xml = _perform_api_call("flow/%d" % flow_id) flow_dict = xmltodict.parse(flow_xml) flow = OpenMLFlow._from_dict(flow_dict) return flow
def get_flow(flow_id): """Download the OpenML flow for a given flow ID. Parameters ---------- flow_id : int The OpenML flow id. """ # TODO add caching here! try: flow_id = int(flow_id) except: raise ValueError("Flow ID must be an int, got %s." % str(flow_id)) flow_xml = _perform_api_call("flow/%d" % flow_id) flow_dict = xmltodict.parse(flow_xml) flow = OpenMLFlow._from_dict(flow_dict) return flow
def _list_flows(api_call): # TODO add proper error handling here! xml_string = _perform_api_call(api_call) flows_dict = xmltodict.parse(xml_string) # Minimalistic check if the XML is useful assert type(flows_dict['oml:flows']['oml:flow']) == list, \ type(flows_dict['oml:flows']) assert flows_dict['oml:flows']['@xmlns:oml'] == \ 'http://openml.org/openml', flows_dict['oml:flows']['@xmlns:oml'] flows = dict() for flow_ in flows_dict['oml:flows']['oml:flow']: fid = int(flow_['oml:id']) flow = {'id': fid, 'full_name': flow_['oml:full_name'], 'name': flow_['oml:name'], 'version': flow_['oml:version'], 'external_version': flow_['oml:external_version'], 'uploader': flow_['oml:uploader']} flows[fid] = flow return flows