def __init__(self, fields, missing_tokens=[''], data_locale=DEFAULT_LOCALE, verbose=False, objective_field=None, objective_field_present=False, include=None): find_locale(data_locale, verbose) self.fields = fields self.fields_by_name = invert_dictionary(fields, 'name') self.fields_by_column_number = invert_dictionary( fields, 'column_number') self.missing_tokens = missing_tokens self.fields_columns = sorted(self.fields_by_column_number.keys()) # Ids of the fields to be included self.filtered_fields = (self.fields.keys() if include is None else include) # To be updated in update_objective_field self.row_ids = None self.headers = None self.objective_field = None self.objective_field_present = None self.filtered_indexes = None self.update_objective_field(objective_field, objective_field_present)
def __init__( self, fields, missing_tokens=[""], data_locale=DEFAULT_LOCALE, verbose=False, objective_field=None, objective_field_present=False, include=None, ): find_locale(data_locale, verbose) self.fields = fields self.fields_by_name = invert_dictionary(fields, "name") self.fields_by_column_number = invert_dictionary(fields, "column_number") self.missing_tokens = missing_tokens self.fields_columns = sorted(self.fields_by_column_number.keys()) # Ids of the fields to be included self.filtered_fields = self.fields.keys() if include is None else include # To be updated in update_objective_field self.row_ids = None self.headers = None self.objective_field = None self.objective_field_present = None self.filtered_indexes = None self.update_objective_field(objective_field, objective_field_present)
def __init__(self, resource_or_fields, missing_tokens=None, data_locale=None, verbose=False, objective_field=None, objective_field_present=False, include=None, errors=None): # The constructor can be instantiated with resources or a fields # structure. The structure is checked and fields structure is returned # if a resource type is matched. try: resource_info = get_fields_structure(resource_or_fields, True) (self.fields, resource_locale, resource_missing_tokens, objective_column, resource_errors) = resource_info if data_locale is None: data_locale = resource_locale if missing_tokens is None: if resource_missing_tokens: missing_tokens = resource_missing_tokens if errors is None: errors = resource_errors except ValueError: # If the resource structure is not in the expected set, fields # structure is assumed self.fields = resource_or_fields if data_locale is None: data_locale = DEFAULT_LOCALE if missing_tokens is None: missing_tokens = DEFAULT_MISSING_TOKENS objective_column = None if self.fields is None: raise ValueError("No fields structure was found.") self.fields_by_name = invert_dictionary(self.fields, 'name') self.fields_by_column_number = invert_dictionary( self.fields, 'column_number') find_locale(data_locale, verbose) self.missing_tokens = missing_tokens self.fields_columns = sorted(self.fields_by_column_number.keys()) # Ids of the fields to be included self.filtered_fields = (self.fields.keys() if include is None else include) # To be updated in update_objective_field self.row_ids = None self.headers = None self.objective_field = None self.objective_field_present = None self.filtered_indexes = None self.field_errors = errors # if the objective field is not set by the user # use the one extracted from the resource info if objective_field is None and objective_column is not None: objective_field = objective_column objective_field_present = True self.update_objective_field(objective_field, objective_field_present)
def __init__(self, fields, missing_tokens=[''], data_locale=DEFAULT_LOCALE, verbose=False): find_locale(data_locale, verbose) self.fields = fields self.fields_by_name = invert_dictionary(fields, 'name') self.fields_by_column_number = invert_dictionary(fields, 'column_number') self.missing_tokens = missing_tokens
def __init__(self, resource_or_fields, missing_tokens=None, data_locale=None, verbose=False, objective_field=None, objective_field_present=False, include=None, errors=None): # The constructor can be instantiated with resources or a fields # structure. The structure is checked and fields structure is returned # if a resource type is matched. try: resource_info = get_fields_structure(resource_or_fields, True) (self.fields, resource_locale, resource_missing_tokens, objective_column, resource_errors) = resource_info if data_locale is None: data_locale = resource_locale if missing_tokens is None: if resource_missing_tokens: missing_tokens = resource_missing_tokens if errors is None: errors = resource_errors except ValueError: # If the resource structure is not in the expected set, fields # structure is assumed self.fields = resource_or_fields if data_locale is None: data_locale = DEFAULT_LOCALE if missing_tokens is None: missing_tokens = DEFAULT_MISSING_TOKENS objective_column = None if self.fields is None: raise ValueError("No fields structure was found.") self.fields_by_name = invert_dictionary(self.fields, 'name') self.fields_by_column_number = invert_dictionary(self.fields, 'column_number') find_locale(data_locale, verbose) self.missing_tokens = missing_tokens self.fields_columns = sorted(self.fields_by_column_number.keys()) # Ids of the fields to be included self.filtered_fields = (self.fields.keys() if include is None else include) # To be updated in update_objective_field self.row_ids = None self.headers = None self.objective_field = None self.objective_field_present = None self.filtered_indexes = None self.field_errors = errors # if the objective field is not set by the user # use the one extracted from the resource info if objective_field is None and objective_column is not None: objective_field = objective_column objective_field_present = True self.update_objective_field(objective_field, objective_field_present)
def to_prediction(self, value_as_string, data_locale=DEFAULT_LOCALE): """Given a prediction string, returns its value in the required type """ objective_field = self.tree.objective_field if self.tree.fields[objective_field]['optype'] == 'numeric': if data_locale is None: data_locale = self.locale find_locale(data_locale) datatype = self.tree.fields[objective_field]['datatype'] cast_function = PYTHON_FUNC.get(datatype, lambda x: unicode(x, "utf-8")) return cast_function(value_as_string) else: return unicode(value_as_string, "utf-8")
def to_prediction(self, value_as_string, data_locale=DEFAULT_LOCALE): """Given a prediction string, returns its value in the required type """ if not isinstance(value_as_string, unicode): value_as_string = unicode(value_as_string, "utf-8") objective_id = self.tree.objective_id if self.fields[objective_id]['optype'] == 'numeric': if data_locale is None: data_locale = self.locale find_locale(data_locale) datatype = self.fields[objective_id]['datatype'] cast_function = PYTHON_FUNC.get(datatype, None) if cast_function is not None: return cast_function(value_as_string) return value_as_string
def to_prediction(model, value_as_string, data_locale=DEFAULT_LOCALE): """Given a prediction string, returns its value in the required type """ if not isinstance(value_as_string, str): value_as_string = str(value_as_string, "utf-8") objective_id = model.objective_id if model.fields[objective_id]['optype'] == NUMERIC: if data_locale is None: data_locale = model.locale find_locale(data_locale) datatype = model.fields[objective_id]['datatype'] cast_function = PYTHON_FUNC.get(datatype, None) if cast_function is not None: return cast_function(value_as_string) return value_as_string