def set_source_args(data_set_header, name, description, args, multi_label_data=None): """Returns a source arguments dict """ source_args = { "name": name, "description": description, "category": args.category, "tags": args.tag, "source_parser": {"header": data_set_header}} # If user has given an OS locale, try to add the locale used in bigml.com if args.user_locale is not None: source_locale = bigml_locale(args.user_locale) if source_locale is None: log_message("WARNING: %s locale equivalence not found." " Using %s instead.\n" % (args.user_locale, LOCALE_DEFAULT), log_file=None, console=True) source_locale = LOCALE_DEFAULT source_args["source_parser"].update({'locale': source_locale}) # If user has set a training separator, use it. if args.training_separator is not None: training_separator = args.training_separator.decode("string_escape") source_args["source_parser"].update({'separator': training_separator}) # If uploading a multi-label file, add the user_metadata info needed to # manage the multi-label fields if args.multi_label and multi_label_data is not None: source_args.update( {"user_metadata": {"multi_label_data": multi_label_data}}) if args.json_args['source']: source_args.update(args.json_args['source']) return source_args
def test_source_processing(api, args, resume, name=None, csv_properties=None, session_file=None, path=None, log=None): """Creating or retrieving a test data source from input arguments """ test_source = None fields = None if csv_properties is None: csv_properties = {} if args.test_set and args.remote: # If resuming, try to extract args.source form log files if resume: message = u.dated("Test source not found. Resuming.\n") resume, args.test_source = c.checkpoint( c.is_source_created, path, suffix="_test", debug=args.debug, message=message, log_file=session_file, console=args.verbosity) if not resume: source_args = r.set_source_args(args, name=name, data_set_header=args.test_header) test_source = r.create_source(args.test_set, source_args, args, api, path, session_file, log, source_type="test") # If a source is provided either through the command line or in resume # steps, we use it. elif args.test_source: test_source = bigml.api.get_source_id(args.test_source) # If we already have source, we check that is finished, extract the # fields, and update them if needed. if test_source: test_source = r.get_source(test_source, api, args.verbosity, session_file) if 'source_parser' in test_source['object']: source_parser = test_source['object']['source_parser'] if 'missing_tokens' in source_parser: csv_properties['missing_tokens'] = ( source_parser['missing_tokens']) if 'locale' in source_parser: csv_properties['data_locale'] = source_parser['locale'] if (args.user_locale is not None and bigml_locale(args.user_locale) == source_parser['locale']): args.user_locale = None fields = Fields(test_source['object']['fields'], **csv_properties) if (args.field_attributes_ or args.types_ or args.user_locale or args.json_args.get('source')): # avoid updating project_id in source project_id, args.project_id = args.project_id, None test_source_args = r.set_source_args(args, fields=fields) test_source = r.update_source(test_source, test_source_args, args, api, session_file) args.project_id = project_id fields = Fields(test_source['object']['fields'], **csv_properties) return test_source, resume, csv_properties, fields
def set_source_args(data_set_header, name, description, args): """Returns a source arguments dict """ source_args = { "name": name, "description": description, "category": args.category, "tags": args.tag, "source_parser": { "header": data_set_header } } # If user has given an OS locale, try to add the locale used in bigml.com if args.user_locale is not None: source_locale = bigml_locale(args.user_locale) if source_locale is None: log_message("WARNING: %s locale equivalence not found." " Using %s instead.\n" % (args.user_locale, LOCALE_DEFAULT), log_file=None, console=True) source_locale = LOCALE_DEFAULT source_args["source_parser"].update({'locale': source_locale}) return source_args
def source_processing( api, args, resume, csv_properties=None, multi_label_data=None, session_file=None, path=None, log=None ): """Creating or retrieving a data source from input arguments """ source = None fields = None if args.training_set or (hasattr(args, "evaluate") and args.evaluate and args.test_set): # If resuming, try to extract args.source form log files if resume: message = u.dated("Source not found. Resuming.\n") resume, args.source = c.checkpoint( c.is_source_created, path, debug=args.debug, message=message, log_file=session_file, console=args.verbosity, ) # If neither a previous source, dataset or model are provided. # we create a new one. Also if --evaluate and test data are provided # we create a new dataset to test with. data_set, data_set_header = r.data_to_source(args) if data_set is not None: source_args = r.set_source_args(args, multi_label_data=multi_label_data, data_set_header=data_set_header) source = r.create_source(data_set, source_args, args, api, path, session_file, log) # If a source is provided either through the command line or in resume # steps, we use it. elif args.source: source = bigml.api.get_source_id(args.source) # If we already have source, we check that is finished, extract the # fields, and update them if needed. if source: source = r.get_source(source, api, args.verbosity, session_file) if "source_parser" in source["object"]: source_parser = source["object"]["source_parser"] if "missing_tokens" in source_parser: csv_properties["missing_tokens"] = source_parser["missing_tokens"] if "locale" in source_parser: csv_properties["data_locale"] = source_parser["locale"] # No changes if user locale is the one in the source. if args.user_locale is not None and bigml_locale(args.user_locale) == source_parser["locale"]: args.user_locale = None fields = Fields(source["object"]["fields"], **csv_properties) if args.field_attributes_ or args.types_ or args.user_locale or args.json_args.get("source"): source_args = r.set_source_args(args, fields=fields) source = r.update_source(source, source_args, args, api, session_file) fields = Fields(source["object"]["fields"], **csv_properties) return source, resume, csv_properties, fields
def set_source_args(data_set_header, name, description, args, multi_label_data=None): """Returns a source arguments dict """ source_args = { "name": name, "description": description, "category": args.category, "tags": args.tag, "source_parser": { "header": data_set_header } } # If user has given an OS locale, try to add the locale used in bigml.com if args.user_locale is not None: source_locale = bigml_locale(args.user_locale) if source_locale is None: log_message("WARNING: %s locale equivalence not found." " Using %s instead.\n" % (args.user_locale, LOCALE_DEFAULT), log_file=None, console=True) source_locale = LOCALE_DEFAULT source_args["source_parser"].update({'locale': source_locale}) # If user has set a training separator, use it. if args.training_separator is not None: training_separator = args.training_separator.decode("string_escape") source_args["source_parser"].update({'separator': training_separator}) # If uploading a multi-label file, add the user_metadata info needed to # manage the multi-label fields if args.multi_label and multi_label_data is not None: source_args.update( {"user_metadata": { "multi_label_data": multi_label_data }}) if args.json_args['source']: source_args.update(args.json_args['source']) return source_args
def set_source_args(data_set_header, name, description, args): """Returns a source arguments dict """ source_args = { "name": name, "description": description, "category": args.category, "tags": args.tag, "source_parser": {"header": data_set_header}} # If user has given an OS locale, try to add the locale used in bigml.com if args.user_locale is not None: source_locale = bigml_locale(args.user_locale) if source_locale is None: log_message("WARNING: %s locale equivalence not found." " Using %s instead.\n" % (args.user_locale, LOCALE_DEFAULT), log_file=None, console=True) source_locale = LOCALE_DEFAULT source_args["source_parser"].update({'locale': source_locale}) return source_args
def test_source_processing(api, args, resume, name=None, csv_properties=None, session_file=None, path=None, log=None): """Creating or retrieving a test data source from input arguments """ test_source = None fields = None if csv_properties is None: csv_properties = {} if args.test_set and args.remote: # If resuming, try to extract args.source form log files if resume: message = u.dated("Test source not found. Resuming.\n") resume, args.test_source = c.checkpoint(c.is_source_created, path, suffix="_test", debug=args.debug, message=message, log_file=session_file, console=args.verbosity) if not resume: source_args = r.set_source_args(args, name=name, data_set_header=args.test_header) test_source = r.create_source(args.test_set, source_args, args, api, path, session_file, log, source_type="test") # If a source is provided either through the command line or in resume # steps, we use it. elif args.test_source: test_source = bigml.api.get_source_id(args.test_source) # If we already have source, we check that is finished, extract the # fields, and update them if needed. if test_source: test_source = r.get_source(test_source, api, args.verbosity, session_file) if 'source_parser' in test_source['object']: source_parser = test_source['object']['source_parser'] if 'missing_tokens' in source_parser: csv_properties['missing_tokens'] = ( source_parser['missing_tokens']) if 'locale' in source_parser: csv_properties['data_locale'] = source_parser['locale'] if (args.user_locale is not None and bigml_locale( args.user_locale) == source_parser['locale']): args.user_locale = None fields = Fields(test_source['object']['fields'], **csv_properties) if (args.field_attributes_ or args.types_ or args.user_locale or args.json_args.get('source')): # avoid updating project_id in source project_id, args.project_id = args.project_id, None test_source_args = r.set_source_args(args, fields=fields) test_source = r.update_source(test_source, source_args, args, api, session_file) args.project_id = project_id fields = Fields(source['object']['fields'], **csv_properties) return test_source, resume, csv_properties, fields
def source_processing(api, args, resume, csv_properties=None, multi_label_data=None, session_file=None, path=None, log=None): """Creating or retrieving a data source from input arguments """ source = None fields = None if (args.training_set or (hasattr(args, "evaluate") and args.evaluate and args.test_set)): # If resuming, try to extract args.source form log files if resume: message = u.dated("Source not found. Resuming.\n") resume, args.source = c.checkpoint(c.is_source_created, path, debug=args.debug, message=message, log_file=session_file, console=args.verbosity) # If neither a previous source, dataset or model are provided. # we create a new one. Also if --evaluate and test data are provided # we create a new dataset to test with. data_set, data_set_header = r.data_to_source(args) if data_set is not None: # Check if there's a created project for it args.project_id = pp.project_processing(api, args, resume, session_file=session_file, path=path, log=log) source_args = r.set_source_args(args, multi_label_data=multi_label_data, data_set_header=data_set_header) source = r.create_source(data_set, source_args, args, api, path, session_file, log) # If a source is provided either through the command line or in resume # steps, we use it. elif args.source: source = bigml.api.get_source_id(args.source) # If we already have source, we check that is finished, extract the # fields, and update them if needed. if source: source = r.get_source(source, api, args.verbosity, session_file) if 'source_parser' in source['object']: source_parser = source['object']['source_parser'] if 'missing_tokens' in source_parser: csv_properties['missing_tokens'] = ( source_parser['missing_tokens']) if 'locale' in source_parser: csv_properties['data_locale'] = source_parser['locale'] # No changes if user locale is the one in the source. if (args.user_locale is not None and bigml_locale( args.user_locale) == source_parser['locale']): args.user_locale = None fields = Fields(source['object']['fields'], **csv_properties) if (args.field_attributes_ or args.types_ or args.user_locale or args.json_args.get('source')): # avoid updating project_id in source project_id, args.project_id = args.project_id, None source_args = r.set_source_args(args, fields=fields) source = r.update_source(source, source_args, args, api, session_file) args.project_id = project_id fields = Fields(source['object']['fields'], **csv_properties) return source, resume, csv_properties, fields
def set_source_args(args, name=None, multi_label_data=None, data_set_header=None, fields=None): """Returns a source arguments dict """ if name is None: name = args.name source_args = set_basic_args(args, name) if args.project_id is not None: source_args.update({"project": args.project_id}) # if header is set, use it if data_set_header is not None: source_args.update({"source_parser": {"header": data_set_header}}) # If user has given an OS locale, try to add the locale used in bigml.com if args.user_locale is not None: source_locale = bigml_locale(args.user_locale) if source_locale is None: log_message("WARNING: %s locale equivalence not found." " Using %s instead.\n" % (args.user_locale, LOCALE_DEFAULT), log_file=None, console=True) source_locale = LOCALE_DEFAULT source_args.update({'source_parser': {}}) source_args["source_parser"].update({'locale': source_locale}) # If user has set a training separator, use it. if args.training_separator is not None: training_separator = decode2(args.training_separator, encoding="string_escape") source_args["source_parser"].update({'separator': training_separator}) # If uploading a multi-label file, add the user_metadata info needed to # manage the multi-label fields if (hasattr(args, 'multi_label') and args.multi_label and multi_label_data is not None): source_args.update( {"user_metadata": { "multi_label_data": multi_label_data }}) # to update fields attributes or types you must have a previous fields # structure (at update time) if fields: if args.field_attributes_: update_attributes(source_args, {"fields": args.field_attributes_}, by_column=True, fields=fields) if args.types_: update_attributes(source_args, {"fields": args.types_}, by_column=True, fields=fields) if args.import_fields: fields_struct = fields.new_fields_structure(args.import_fields) check_fields_struct(fields_struct, "source") update_attributes(source_args, fields_struct) if 'source' in args.json_args: update_json_args(source_args, args.json_args.get('source'), fields) return source_args