Ejemplo n.º 1
0
    def test_from_xml_to_xml(self):
        # Get the raw xml thing
        # TODO maybe get this via get_flow(), which would have to be refactored to allow getting only the xml dictionary
        # TODO: no sklearn flows.
        for flow_id in [
                3,
                5,
                7,
                9,
        ]:
            flow_xml = _perform_api_call("flow/%d" % flow_id)
            flow_dict = xmltodict.parse(flow_xml)

            flow = openml.OpenMLFlow._from_dict(flow_dict)
            new_xml = flow._to_xml()

            flow_xml = flow_xml.replace('  ', '').replace(
                '\t', '').strip().replace('\n\n', '\n').replace('"', '"')
            flow_xml = re.sub(r'^$', '', flow_xml)
            new_xml = new_xml.replace('  ',
                                      '').replace('\t', '').strip().replace(
                                          '\n\n', '\n').replace('"', '"')
            new_xml = re.sub(r'^$', '', new_xml)

            self.assertEqual(new_xml, flow_xml)
Ejemplo n.º 2
0
def _list_flows(api_call):
    # TODO add proper error handling here!
    xml_string = _perform_api_call(api_call)
    flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow', ))

    # Minimalistic check if the XML is useful
    assert type(flows_dict['oml:flows']['oml:flow']) == list, \
        type(flows_dict['oml:flows'])
    assert flows_dict['oml:flows']['@xmlns:oml'] == \
           'http://openml.org/openml', flows_dict['oml:flows']['@xmlns:oml']

    flows = dict()
    for flow_ in flows_dict['oml:flows']['oml:flow']:
        fid = int(flow_['oml:id'])
        flow = {
            'id': fid,
            'full_name': flow_['oml:full_name'],
            'name': flow_['oml:name'],
            'version': flow_['oml:version'],
            'external_version': flow_['oml:external_version'],
            'uploader': flow_['oml:uploader']
        }
        flows[fid] = flow

    return flows
Ejemplo n.º 3
0
    def test_from_xml_to_xml(self):
        # Get the raw xml thing
        # TODO maybe get this via get_flow(), which would have to be refactored
        # to allow getting only the xml dictionary
        # TODO: no sklearn flows.
        for flow_id in [
                3,
                5,
                7,
                9,
        ]:
            flow_xml = _perform_api_call("flow/%d" % flow_id,
                                         request_method="get")
            flow_dict = xmltodict.parse(flow_xml)

            flow = openml.OpenMLFlow._from_dict(flow_dict)
            new_xml = flow._to_xml()

            flow_xml = (flow_xml.replace("  ", "").replace(
                "\t", "").strip().replace("\n\n", "\n").replace(""", '"'))
            flow_xml = re.sub(r"^$", "", flow_xml)
            new_xml = (new_xml.replace("  ", "").replace(
                "\t", "").strip().replace("\n\n", "\n").replace(""", '"'))
            new_xml = re.sub(r"^$", "", new_xml)

            self.assertEqual(new_xml, flow_xml)
Ejemplo n.º 4
0
def flow_exists(name, external_version):
    """Retrieves the flow id of the flow uniquely identified by name + external_version.

    Parameter
    ---------
    name : string
        Name of the flow
    version : string
        Version information associated with flow.

    Returns
    -------
    flow_exist : int
        flow id iff exists, False otherwise

    Notes
    -----
    see http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
    """
    if not (isinstance(name, six.string_types) and len(name) > 0):
        raise ValueError('Argument \'name\' should be a non-empty string')
    if not (isinstance(name, six.string_types) and len(external_version) > 0):
        raise ValueError('Argument \'version\' should be a non-empty string')

    xml_response = _perform_api_call("flow/exists",
                                     data={'name': name, 'external_version': external_version})

    result_dict = xmltodict.parse(xml_response)
    flow_id = int(result_dict['oml:flow_exists']['oml:id'])
    if flow_id > 0:
        return flow_id
    else:
        return False
Ejemplo n.º 5
0
    def test_from_xml_to_xml(self):
        # Get the raw xml thing
        # TODO maybe get this via get_flow(), which would have to be refactored to allow getting only the xml dictionary
        # TODO: no sklearn flows.
        for flow_id in [3, 5, 7, 9, ]:
            flow_xml = _perform_api_call("flow/%d" % flow_id)
            flow_dict = xmltodict.parse(flow_xml)

            flow = openml.OpenMLFlow._from_dict(flow_dict)
            new_xml = flow._to_xml()

            flow_xml = flow_xml.replace('  ', '').replace('\t', '').strip().replace('\n\n', '\n').replace('"', '"')
            flow_xml = re.sub(r'^$', '', flow_xml)
            new_xml = new_xml.replace('  ', '').replace('\t', '').strip().replace('\n\n', '\n').replace('"', '"')
            new_xml = re.sub(r'^$', '', new_xml)

            self.assertEqual(new_xml, flow_xml)
Ejemplo n.º 6
0
def get_flow(flow_id):
    """Download the OpenML flow for a given flow ID.

    Parameters
    ----------
    flow_id : int
        The OpenML flow id.
    """
    # TODO add caching here!
    try:
        flow_id = int(flow_id)
    except:
        raise ValueError("Flow ID must be an int, got %s." % str(flow_id))

    flow_xml = _perform_api_call("flow/%d" % flow_id)

    flow_dict = xmltodict.parse(flow_xml)
    flow = OpenMLFlow._from_dict(flow_dict)

    return flow
Ejemplo n.º 7
0
def get_flow(flow_id):
    """Download the OpenML flow for a given flow ID.

    Parameters
    ----------
    flow_id : int
        The OpenML flow id.
    """
    # TODO add caching here!
    try:
        flow_id = int(flow_id)
    except:
        raise ValueError("Flow ID must be an int, got %s." % str(flow_id))

    flow_xml = _perform_api_call("flow/%d" % flow_id)

    flow_dict = xmltodict.parse(flow_xml)
    flow = OpenMLFlow._from_dict(flow_dict)

    return flow
Ejemplo n.º 8
0
def _list_flows(api_call):
    # TODO add proper error handling here!
    xml_string = _perform_api_call(api_call)
    flows_dict = xmltodict.parse(xml_string)

    # Minimalistic check if the XML is useful
    assert type(flows_dict['oml:flows']['oml:flow']) == list, \
        type(flows_dict['oml:flows'])
    assert flows_dict['oml:flows']['@xmlns:oml'] == \
           'http://openml.org/openml', flows_dict['oml:flows']['@xmlns:oml']

    flows = dict()
    for flow_ in flows_dict['oml:flows']['oml:flow']:
        fid = int(flow_['oml:id'])
        flow = {'id': fid,
                'full_name': flow_['oml:full_name'],
                'name': flow_['oml:name'],
                'version': flow_['oml:version'],
                'external_version': flow_['oml:external_version'],
                'uploader': flow_['oml:uploader']}
        flows[fid] = flow

    return flows