Beispiel #1
0
def set_source_args(data_set_header, name, description, args,
                    multi_label_data=None):
    """Returns a source arguments dict

    """
    source_args = {
        "name": name,
        "description": description,
        "category": args.category,
        "tags": args.tag,
        "source_parser": {"header": data_set_header}}
    # If user has given an OS locale, try to add the locale used in bigml.com
    if args.user_locale is not None:
        source_locale = bigml_locale(args.user_locale)
        if source_locale is None:
            log_message("WARNING: %s locale equivalence not found."
                        " Using %s instead.\n" % (args.user_locale,
                        LOCALE_DEFAULT), log_file=None, console=True)
            source_locale = LOCALE_DEFAULT
        source_args["source_parser"].update({'locale': source_locale})
    # If user has set a training separator, use it.
    if args.training_separator is not None:
        training_separator = args.training_separator.decode("string_escape")
        source_args["source_parser"].update({'separator': training_separator})
    # If uploading a multi-label file, add the user_metadata info needed to
    # manage the multi-label fields
    if args.multi_label and multi_label_data is not None:
        source_args.update(
            {"user_metadata":
                {"multi_label_data": multi_label_data}})
    if args.json_args['source']:
        source_args.update(args.json_args['source'])
    return source_args
Beispiel #2
0
def test_source_processing(api, args, resume,
                           name=None, csv_properties=None,
                           session_file=None, path=None, log=None):
    """Creating or retrieving a test data source from input arguments

    """
    test_source = None
    fields = None
    if csv_properties is None:
        csv_properties = {}
    if args.test_set and args.remote:
        # If resuming, try to extract args.source form log files
        if resume:
            message = u.dated("Test source not found. Resuming.\n")
            resume, args.test_source = c.checkpoint(
                c.is_source_created, path, suffix="_test", debug=args.debug,
                message=message, log_file=session_file, console=args.verbosity)

        if not resume:
            source_args = r.set_source_args(args, name=name,
                                            data_set_header=args.test_header)
            test_source = r.create_source(args.test_set, source_args, args,
                                          api, path, session_file, log,
                                          source_type="test")

    # If a source is provided either through the command line or in resume
    # steps, we use it.
    elif args.test_source:
        test_source = bigml.api.get_source_id(args.test_source)

    # If we already have source, we check that is finished, extract the
    # fields, and update them if needed.
    if test_source:
        test_source = r.get_source(test_source, api, args.verbosity,
                                   session_file)
        if 'source_parser' in test_source['object']:
            source_parser = test_source['object']['source_parser']
            if 'missing_tokens' in source_parser:
                csv_properties['missing_tokens'] = (
                    source_parser['missing_tokens'])
            if 'locale' in source_parser:
                csv_properties['data_locale'] = source_parser['locale']
                if (args.user_locale is not None and
                        bigml_locale(args.user_locale) ==
                        source_parser['locale']):
                    args.user_locale = None

        fields = Fields(test_source['object']['fields'], **csv_properties)

        if (args.field_attributes_ or args.types_ or args.user_locale
                or args.json_args.get('source')):
            # avoid updating project_id in source
            project_id, args.project_id = args.project_id, None
            test_source_args = r.set_source_args(args, fields=fields)
            test_source = r.update_source(test_source, test_source_args, args,
                                          api, session_file)
            args.project_id = project_id
            fields = Fields(test_source['object']['fields'], **csv_properties)

    return test_source, resume, csv_properties, fields
Beispiel #3
0
def set_source_args(data_set_header, name, description, args):
    """Returns a source arguments dict

    """
    source_args = {
        "name": name,
        "description": description,
        "category": args.category,
        "tags": args.tag,
        "source_parser": {
            "header": data_set_header
        }
    }
    # If user has given an OS locale, try to add the locale used in bigml.com
    if args.user_locale is not None:
        source_locale = bigml_locale(args.user_locale)
        if source_locale is None:
            log_message("WARNING: %s locale equivalence not found."
                        " Using %s instead.\n" %
                        (args.user_locale, LOCALE_DEFAULT),
                        log_file=None,
                        console=True)
            source_locale = LOCALE_DEFAULT
        source_args["source_parser"].update({'locale': source_locale})
    return source_args
Beispiel #4
0
def source_processing(
    api, args, resume, csv_properties=None, multi_label_data=None, session_file=None, path=None, log=None
):
    """Creating or retrieving a data source from input arguments

    """
    source = None
    fields = None
    if args.training_set or (hasattr(args, "evaluate") and args.evaluate and args.test_set):
        # If resuming, try to extract args.source form log files

        if resume:
            message = u.dated("Source not found. Resuming.\n")
            resume, args.source = c.checkpoint(
                c.is_source_created,
                path,
                debug=args.debug,
                message=message,
                log_file=session_file,
                console=args.verbosity,
            )

    # If neither a previous source, dataset or model are provided.
    # we create a new one. Also if --evaluate and test data are provided
    # we create a new dataset to test with.
    data_set, data_set_header = r.data_to_source(args)
    if data_set is not None:
        source_args = r.set_source_args(args, multi_label_data=multi_label_data, data_set_header=data_set_header)
        source = r.create_source(data_set, source_args, args, api, path, session_file, log)

    # If a source is provided either through the command line or in resume
    # steps, we use it.
    elif args.source:
        source = bigml.api.get_source_id(args.source)

    # If we already have source, we check that is finished, extract the
    # fields, and update them if needed.
    if source:
        source = r.get_source(source, api, args.verbosity, session_file)
        if "source_parser" in source["object"]:
            source_parser = source["object"]["source_parser"]
            if "missing_tokens" in source_parser:
                csv_properties["missing_tokens"] = source_parser["missing_tokens"]
            if "locale" in source_parser:
                csv_properties["data_locale"] = source_parser["locale"]
                # No changes if user locale is the one in the source.
                if args.user_locale is not None and bigml_locale(args.user_locale) == source_parser["locale"]:
                    args.user_locale = None
        fields = Fields(source["object"]["fields"], **csv_properties)

        if args.field_attributes_ or args.types_ or args.user_locale or args.json_args.get("source"):
            source_args = r.set_source_args(args, fields=fields)
            source = r.update_source(source, source_args, args, api, session_file)
            fields = Fields(source["object"]["fields"], **csv_properties)

    return source, resume, csv_properties, fields
Beispiel #5
0
def set_source_args(data_set_header,
                    name,
                    description,
                    args,
                    multi_label_data=None):
    """Returns a source arguments dict

    """
    source_args = {
        "name": name,
        "description": description,
        "category": args.category,
        "tags": args.tag,
        "source_parser": {
            "header": data_set_header
        }
    }
    # If user has given an OS locale, try to add the locale used in bigml.com
    if args.user_locale is not None:
        source_locale = bigml_locale(args.user_locale)
        if source_locale is None:
            log_message("WARNING: %s locale equivalence not found."
                        " Using %s instead.\n" %
                        (args.user_locale, LOCALE_DEFAULT),
                        log_file=None,
                        console=True)
            source_locale = LOCALE_DEFAULT
        source_args["source_parser"].update({'locale': source_locale})
    # If user has set a training separator, use it.
    if args.training_separator is not None:
        training_separator = args.training_separator.decode("string_escape")
        source_args["source_parser"].update({'separator': training_separator})
    # If uploading a multi-label file, add the user_metadata info needed to
    # manage the multi-label fields
    if args.multi_label and multi_label_data is not None:
        source_args.update(
            {"user_metadata": {
                "multi_label_data": multi_label_data
            }})
    if args.json_args['source']:
        source_args.update(args.json_args['source'])
    return source_args
Beispiel #6
0
def set_source_args(data_set_header, name, description, args):
    """Returns a source arguments dict

    """
    source_args = {
        "name": name,
        "description": description,
        "category": args.category,
        "tags": args.tag,
        "source_parser": {"header": data_set_header}}
    # If user has given an OS locale, try to add the locale used in bigml.com
    if args.user_locale is not None:
        source_locale = bigml_locale(args.user_locale)
        if source_locale is None:
            log_message("WARNING: %s locale equivalence not found."
                        " Using %s instead.\n" % (args.user_locale,
                        LOCALE_DEFAULT), log_file=None, console=True)
            source_locale = LOCALE_DEFAULT
        source_args["source_parser"].update({'locale': source_locale})
    return source_args
Beispiel #7
0
def test_source_processing(api,
                           args,
                           resume,
                           name=None,
                           csv_properties=None,
                           session_file=None,
                           path=None,
                           log=None):
    """Creating or retrieving a test data source from input arguments

    """
    test_source = None
    fields = None
    if csv_properties is None:
        csv_properties = {}
    if args.test_set and args.remote:
        # If resuming, try to extract args.source form log files
        if resume:
            message = u.dated("Test source not found. Resuming.\n")
            resume, args.test_source = c.checkpoint(c.is_source_created,
                                                    path,
                                                    suffix="_test",
                                                    debug=args.debug,
                                                    message=message,
                                                    log_file=session_file,
                                                    console=args.verbosity)

        if not resume:
            source_args = r.set_source_args(args,
                                            name=name,
                                            data_set_header=args.test_header)
            test_source = r.create_source(args.test_set,
                                          source_args,
                                          args,
                                          api,
                                          path,
                                          session_file,
                                          log,
                                          source_type="test")

    # If a source is provided either through the command line or in resume
    # steps, we use it.
    elif args.test_source:
        test_source = bigml.api.get_source_id(args.test_source)

    # If we already have source, we check that is finished, extract the
    # fields, and update them if needed.
    if test_source:
        test_source = r.get_source(test_source, api, args.verbosity,
                                   session_file)
        if 'source_parser' in test_source['object']:
            source_parser = test_source['object']['source_parser']
            if 'missing_tokens' in source_parser:
                csv_properties['missing_tokens'] = (
                    source_parser['missing_tokens'])
            if 'locale' in source_parser:
                csv_properties['data_locale'] = source_parser['locale']
                if (args.user_locale is not None and bigml_locale(
                        args.user_locale) == source_parser['locale']):
                    args.user_locale = None

        fields = Fields(test_source['object']['fields'], **csv_properties)

        if (args.field_attributes_ or args.types_ or args.user_locale
                or args.json_args.get('source')):
            # avoid updating project_id in source
            project_id, args.project_id = args.project_id, None
            test_source_args = r.set_source_args(args, fields=fields)
            test_source = r.update_source(test_source, source_args, args, api,
                                          session_file)
            args.project_id = project_id
            fields = Fields(source['object']['fields'], **csv_properties)

    return test_source, resume, csv_properties, fields
Beispiel #8
0
def source_processing(api,
                      args,
                      resume,
                      csv_properties=None,
                      multi_label_data=None,
                      session_file=None,
                      path=None,
                      log=None):
    """Creating or retrieving a data source from input arguments

    """
    source = None
    fields = None
    if (args.training_set or
        (hasattr(args, "evaluate") and args.evaluate and args.test_set)):
        # If resuming, try to extract args.source form log files

        if resume:
            message = u.dated("Source not found. Resuming.\n")
            resume, args.source = c.checkpoint(c.is_source_created,
                                               path,
                                               debug=args.debug,
                                               message=message,
                                               log_file=session_file,
                                               console=args.verbosity)

    # If neither a previous source, dataset or model are provided.
    # we create a new one. Also if --evaluate and test data are provided
    # we create a new dataset to test with.
    data_set, data_set_header = r.data_to_source(args)
    if data_set is not None:
        # Check if there's a created project for it
        args.project_id = pp.project_processing(api,
                                                args,
                                                resume,
                                                session_file=session_file,
                                                path=path,
                                                log=log)
        source_args = r.set_source_args(args,
                                        multi_label_data=multi_label_data,
                                        data_set_header=data_set_header)
        source = r.create_source(data_set, source_args, args, api, path,
                                 session_file, log)

    # If a source is provided either through the command line or in resume
    # steps, we use it.
    elif args.source:
        source = bigml.api.get_source_id(args.source)

    # If we already have source, we check that is finished, extract the
    # fields, and update them if needed.
    if source:
        source = r.get_source(source, api, args.verbosity, session_file)
        if 'source_parser' in source['object']:
            source_parser = source['object']['source_parser']
            if 'missing_tokens' in source_parser:
                csv_properties['missing_tokens'] = (
                    source_parser['missing_tokens'])
            if 'locale' in source_parser:
                csv_properties['data_locale'] = source_parser['locale']
                # No changes if user locale is the one in the source.
                if (args.user_locale is not None and bigml_locale(
                        args.user_locale) == source_parser['locale']):
                    args.user_locale = None
        fields = Fields(source['object']['fields'], **csv_properties)

        if (args.field_attributes_ or args.types_ or args.user_locale
                or args.json_args.get('source')):
            # avoid updating project_id in source
            project_id, args.project_id = args.project_id, None
            source_args = r.set_source_args(args, fields=fields)
            source = r.update_source(source, source_args, args, api,
                                     session_file)
            args.project_id = project_id
            fields = Fields(source['object']['fields'], **csv_properties)

    return source, resume, csv_properties, fields
Beispiel #9
0
def set_source_args(args,
                    name=None,
                    multi_label_data=None,
                    data_set_header=None,
                    fields=None):
    """Returns a source arguments dict

    """

    if name is None:
        name = args.name
    source_args = set_basic_args(args, name)
    if args.project_id is not None:
        source_args.update({"project": args.project_id})
    # if header is set, use it
    if data_set_header is not None:
        source_args.update({"source_parser": {"header": data_set_header}})
    # If user has given an OS locale, try to add the locale used in bigml.com
    if args.user_locale is not None:
        source_locale = bigml_locale(args.user_locale)
        if source_locale is None:
            log_message("WARNING: %s locale equivalence not found."
                        " Using %s instead.\n" %
                        (args.user_locale, LOCALE_DEFAULT),
                        log_file=None,
                        console=True)
            source_locale = LOCALE_DEFAULT
        source_args.update({'source_parser': {}})
        source_args["source_parser"].update({'locale': source_locale})
    # If user has set a training separator, use it.
    if args.training_separator is not None:
        training_separator = decode2(args.training_separator,
                                     encoding="string_escape")
        source_args["source_parser"].update({'separator': training_separator})
    # If uploading a multi-label file, add the user_metadata info needed to
    # manage the multi-label fields
    if (hasattr(args, 'multi_label') and args.multi_label
            and multi_label_data is not None):
        source_args.update(
            {"user_metadata": {
                "multi_label_data": multi_label_data
            }})

    # to update fields attributes or types you must have a previous fields
    # structure (at update time)
    if fields:
        if args.field_attributes_:
            update_attributes(source_args, {"fields": args.field_attributes_},
                              by_column=True,
                              fields=fields)
        if args.types_:
            update_attributes(source_args, {"fields": args.types_},
                              by_column=True,
                              fields=fields)
        if args.import_fields:
            fields_struct = fields.new_fields_structure(args.import_fields)
            check_fields_struct(fields_struct, "source")
            update_attributes(source_args, fields_struct)
        if 'source' in args.json_args:
            update_json_args(source_args, args.json_args.get('source'), fields)
    return source_args