def svr_xml_converter(raw_data): ''' This method converts the supplied xml file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. ''' feature_count = None list_dataset = [] list_observation_label = [] logger = Logger(__name__, 'error', 'error') # convert xml file to python 'dict' dataset = xmltodict.parse(raw_data) # build 'list_dataset' for observation in dataset['dataset']['observation']: for key in observation: if key == 'criterion': observation_label = observation['criterion'] list_observation_label.append(observation[key]) elif key == 'predictor': for predictor in observation[key]: predictor_label = predictor['label'] predictor_value = predictor['value'] validate_value = Validate_Dataset(predictor_value) validate_value.validate_value() list_error_value = validate_value.get_errors() if list_error_value: logger.log(list_error_value) return None else: list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(predictor_label), 'indep_variable_value': predictor_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation['predictor']) # save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
def svr_xml_converter(raw_data): """ This method converts the supplied xml file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. """ feature_count = None list_dataset = [] list_observation_label = [] logger = Logger(__name__, "error", "error") # convert xml file to python 'dict' dataset = xmltodict.parse(raw_data) # build 'list_dataset' for observation in dataset["dataset"]["observation"]: for key in observation: if key == "criterion": observation_label = observation["criterion"] list_observation_label.append(observation[key]) elif key == "predictor": for predictor in observation[key]: predictor_label = predictor["label"] predictor_value = predictor["value"] validate_value = Validate_Dataset(predictor_value) validate_value.validate_value() list_error_value = validate_value.get_errors() if list_error_value: logger.log(list_error_value) return None else: list_dataset.append( { "dep_variable_label": str(observation_label), "indep_variable_label": str(predictor_label), "indep_variable_value": predictor_value, } ) # generalized feature count in an observation if not feature_count: feature_count = len(observation["predictor"]) # save observation labels, and return raw_data.close() return {"dataset": list_dataset, "observation_labels": list_observation_label, "feature_count": feature_count}
def xml_to_dict(self): list_dataset = [] observation_label = [] # convert xml file to python 'dict' dataset = xmltodict.parse(self.svm_file) # build 'list_dataset' for dep_variable in dataset['dataset']['observation']: dep_variable_label = dep_variable['dependent-variable'] validate = Validate_Dataset(dep_variable_label) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: observation_label.append(dep_variable_label) for indep_variable in dep_variable['independent-variable']: indep_variable_label = indep_variable['label'] indep_variable_value = indep_variable['value'] validate_label = Validate_Dataset(indep_variable_label) validate_value = Validate_Dataset(indep_variable_value) validate_label.validate_label() validate_value.validate_value() list_error_label = validate.get_errors() list_error_value = validate.get_errors() if list_error_label or list_error_value: print list_error_label print list_error_value return None else: list_dataset.append({'dep_variable_label': dep_variable_label, 'indep_variable_label': indep_variable_label, 'indep_variable_value': indep_variable_value}) # generalized feature count in an observation if not self.count_features: self.count_features = len(dep_variable['independent-variable']) # close file, save observation labels, and return self.svm_file.close() self.observation_labels = observation_label return list_dataset
def svm_csv_converter(raw_data): '''@svm_csv_converter This method converts the supplied csv file-object, intended for an svm model, to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. Note: we use the 'Universal Newline Support' with the 'U' parameter when opening 'raw_data'. This allows newlines to be understood regardless, if the newline character was created in osx, windows, or linux. Note: since 'row' is a list, with one comma-delimited string element, the following line is required in this method: row = row[0].split(',') ''' feature_count = None list_dataset = [] list_observation_label = [] list_feature_label = [] logger = Logger(__name__, 'error', 'error') # open temporary 'csvfile' reader object dataset_reader = csv.reader(raw_data, delimiter=' ', quotechar='|') # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: list_feature_label.append(value) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: list_observation_label.append(value) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not feature_count: feature_count = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate(islice(row_indep_variable, 1, None)): try: validate = Validate_Dataset(value) validate.validate_value() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: value = float(value) except Exception as error: logger.log(error) return False list_dataset.append({ 'dep_variable_label': list_observation_label[dep_index], 'indep_variable_label': list_feature_label[indep_index], 'indep_variable_value': value }) # close file, save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
def svm_xml_converter(raw_data): '''@svm_xml_converter This method converts the supplied xml file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. ''' feature_count = None list_dataset = [] list_observation_label = [] logger = Logger(__name__, 'error', 'error') # convert xml file to python 'dict' dataset = xmltodict.parse(raw_data) # build 'list_dataset' for observation in dataset['dataset']['observation']: observation_label = observation['dependent-variable'] validate = Validate_Dataset(observation_label) validate.validate_label() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: list_observation_label.append(observation_label) for feature in observation['independent-variable']: feature_label = feature['label'] feature_value = feature['value'] validate_label = Validate_Dataset(feature_label) validate_value = Validate_Dataset(feature_value) validate_label.validate_label() validate_value.validate_value() list_error_label = validate.get_errors() list_error_value = validate.get_errors() if list_error_label or list_error_value: logger.log(list_error_label) logger.log(list_error_value) return None else: list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation['independent-variable']) # save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
def svm_json_converter(raw_data, is_json): '''@svm_json_converter This method converts the supplied json file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @is_json, flag indicating 'raw_data' is a json string. @observation_labels, is a list containing dependent variable labels. ''' feature_count = None list_dataset = [] observation_labels = [] logger = Logger(__name__, 'error', 'error') if is_json: dataset = raw_data else: dataset = json.load(raw_data) for observation_label in dataset: # variables observations = dataset[observation_label] # validation (part 1) validate_olabel = Validate_Dataset(observation_label) validate_olabel.validate_label() # dependent variable with single observation if type(observations) == list: for observation in observations: for feature_label, feature_value in observation.items(): # validation (part 2) validate_flabel = Validate_Dataset(feature_label) validate_flabel.validate_label() validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() # restructured data list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation) # dependent variable with multiple observations elif type(observations) == dict: for feature_label, feature_value in observations.items(): # validation (part 2) validate_flabel = Validate_Dataset(feature_label) validate_flabel.validate_label() validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() # restructured data list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observations) # list of observation label observation_labels.append(observation_label) # check for errors olabel_error = validate_olabel.get_errors() flabel_error = validate_flabel.get_errors() fvalue_error = validate_fvalue.get_errors() for error in [olabel_error, flabel_error, fvalue_error]: if error: logger.log(error) if error and len(error) > 0: return None # close file if not is_json: raw_data.close() # save observation labels, and return return { 'dataset': list_dataset, 'observation_labels': observation_labels, 'feature_count': feature_count }
def csv_to_dict(self): """@csv_to_dict This method converts the supplied csv file-object to a python dictionary. @list_observation_label, is a list containing dependent variable labels. Note: we use the 'Universal Newline Support' with the 'U" parameter when opening 'self.svm_data'. This allows newlines to be understood regardless, if the newline character was created in osx, windows, or linux. Note: since 'row' is a list, with one comma-delimited string element, the following line is required in this method: row = row[0].split(',') """ list_dataset = [] list_observation_label = [] list_feature_label = [] # open temporary 'csvfile' reader object dataset_reader = csv.reader( self.svm_data, delimiter=' ', quotechar='|' ) # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_feature_label.append(value) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_observation_label.append(value) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not self.count_features: self.count_features = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate( islice(row_indep_variable, 1, None) ): try: validate = Validate_Dataset(value) validate.validate_value() list_error = validate.get_errors() if list_error: print list_error return None else: value = float(value) except Exception as error: print error return False list_dataset.append({ 'dep_variable_label': list_observation_label[dep_index], 'indep_variable_label': list_feature_label[indep_index], 'indep_variable_value': value }) # close file, save observation labels, and return self.svm_data.close() self.observation_labels = list_observation_label return list_dataset
def xml_to_dict(self): """@xml_to_dict This method converts the supplied xml file-object to a python dictionary. @list_observation_label, is a list containing dependent variable labels. """ list_dataset = [] list_observation_label = [] # convert xml file to python 'dict' dataset = xmltodict.parse(self.svm_data) # build 'list_dataset' for observation in dataset['dataset']['observation']: observation_label = observation['dependent-variable'] validate = Validate_Dataset(observation_label) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_observation_label.append(observation_label) for feature in observation['independent-variable']: feature_label = feature['label'] feature_value = feature['value'] validate_label = Validate_Dataset(feature_label) validate_value = Validate_Dataset(feature_value) validate_label.validate_label() validate_value.validate_value() list_error_label = validate.get_errors() list_error_value = validate.get_errors() if list_error_label or list_error_value: print list_error_label print list_error_value return None else: list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not self.count_features: self.count_features = len(observation['independent-variable']) # close file, save observation labels, and return self.svm_data.close() self.observation_labels = list_observation_label return list_dataset
def svm_csv_converter(raw_data): ''' This method converts the supplied csv file-object, intended for an svm model, to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. Note: we use the 'Universal Newline Support' with the 'U' parameter when opening 'raw_data'. This allows newlines to be understood regardless, if the newline character was created in osx, windows, or linux. Note: since 'row' is a list, with one comma-delimited string element, the following line is required in this method: row = row[0].split(',') ''' feature_count = None list_dataset = [] list_observation_label = [] list_feature_label = [] logger = Logger(__name__, 'error', 'error') # open temporary 'csvfile' reader object dataset_reader = csv.reader( raw_data, delimiter=' ', quotechar='|' ) # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): list_feature_label.append(str(value)) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: list_observation_label.append(str(value)) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not feature_count: feature_count = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate( islice(row_indep_variable, 1, None) ): try: validate = Validate_Dataset(value) validate.validate_value() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: value = float(value) except Exception as error: logger.log(error) return False list_dataset.append({ 'dep_variable_label': list_observation_label[dep_index], 'indep_variable_label': list_feature_label[indep_index], 'indep_variable_value': value }) # close file, save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
def csv_to_dict(self): list_dataset = [] observation_label = [] indep_variable_label = [] # open temporary 'csvfile' reader object dataset_reader = csv.reader(self.svm_file, delimiter=' ', quotechar='|') # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: indep_variable_label.append(value) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: observation_label.append(value) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not self.count_features: self.count_features = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate(islice(row_indep_variable, 1, None)): try: validate = Validate_Dataset(value) validate.validate_value() list_error = validate.get_errors() if list_error: print list_error return None else: value = float(value) except Exception as error: print error return False list_dataset.append({'dep_variable_label': observation_label[dep_index], 'indep_variable_label': indep_variable_label[indep_index], 'indep_variable_value': value}) # close file, save observation labels, and return self.svm_file.close() self.observation_labels = observation_label return list_dataset
def svm_json_converter(raw_data, is_json): ''' This method converts the supplied json file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @is_json, flag indicating 'raw_data' is a json string. @observation_labels, is a list containing dependent variable labels. ''' # local variables feature_count = None list_dataset = [] observation_labels = [] logger = Logger(__name__, 'error', 'error') # web-interface if not is_json: dataset = json.load(raw_data) for observation_label in dataset: # variables observations = dataset[observation_label] # dependent variable with single observation if type(observations) == dict: for feature_label, feature_value in observations.items(): # validation validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(feature_label), 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observations) # dependent variable with multiple observations elif type(observations) == list: for observation in observations: for feature_label, feature_value in observation.items(): # validation validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(feature_label), 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation) # list of observation label observation_labels.append(observation_label) # programmatic-interface else: dataset = raw_data observation_label = raw_data[0] # list of observation label observation_labels.append(observation_label) # dependent variable with single observation if type(raw_data[1]) == dict: for label, feature in raw_data[1].items(): # validation validate_fvalue = Validate_Dataset(feature) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(label), 'indep_variable_value': feature }) # generalized feature count in an observation if not feature_count: feature_count = len(raw_data[1]) # dependent variable with multiple observations if type(raw_data[1]) == list: for feature_set in raw_data[1]: for feature_label, feature_value in feature_set.items(): # validation validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(feature_label), 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(feature_set) # close file if not is_json: raw_data.close() # save observation labels, and return return { 'dataset': list_dataset, 'observation_labels': observation_labels, 'feature_count': feature_count }
def svr_json_converter(raw_data, is_json): ''' This method converts the supplied json file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @is_json, flag indicating 'raw_data' is a json string. @observation_labels, is a list containing dependent variable labels. ''' # local variables feature_count = None list_dataset = [] observation_labels = [] logger = Logger(__name__, 'error', 'error') # web-interface if not is_json: dataset = json.load(raw_data) for criterion, predictors in dataset.items(): observation_label = criterion # list of observation label observation_labels.append(criterion) # criterion with single observation if type(predictors) == dict: for label, predictor in predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(predictors) # criterion with multiple observation if type(predictors) == list: for criterion in predictors: for label, predictor in criterion.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(criterion.items()) # programmatic-interface else: dataset = raw_data for criterion, predictors in dataset.items(): # list of observation label observation_labels.append(criterion) # criterion with single observation if type(predictors) == dict: for label, predictor in predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(criterion), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(predictors.items()) # criterion with multiple observation if type(predictors) == list: for single_predictors in predictors: for label, predictor in single_predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(criterion), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(single_predictors.items()) # close file if not is_json: raw_data.close() # save observation labels, and return return { 'dataset': list_dataset, 'observation_labels': observation_labels, 'feature_count': feature_count }
def svr_json_converter(raw_data, is_json): '''@svr_json_converter This method converts the supplied json file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @is_json, flag indicating 'raw_data' is a json string. @observation_labels, is a list containing dependent variable labels. ''' # local variables feature_count = None list_dataset = [] observation_labels = [] logger = Logger(__name__, 'error', 'error') # web-interface if not is_json: dataset = json.load(raw_data) for criterion, predictors in dataset.items(): observation_label = criterion # list of observation label observation_labels.append(criterion) # criterion with single observation if type(predictors) == dict: for label, predictor in predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(str(predictor)) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(predictors) # criterion with multiple observation if type(predictors) == list: for criterion in predictors: for label, predictor in criterion.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(criterion.items()) # programmatic-interface else: dataset = raw_data for criterion, predictors in dataset.items(): # list of observation label observation_labels.append(criterion) # criterion with single observation if type(predictors) == dict: for label, predictor in predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(criterion), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(predictors.items()) # criterion with multiple observation if type(predictors) == list: for single_predictors in predictors: for label, predictor in single_predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(criterion), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(single_predictors.items()) # close file if not is_json: raw_data.close() # save observation labels, and return return { 'dataset': list_dataset, 'observation_labels': observation_labels, 'feature_count': feature_count }
def csv_to_dict(self): """@csv_to_dict This method converts the supplied csv file-object to a python dictionary. @list_observation_label, is a list containing dependent variable labels. Note: we use the 'Universal Newline Support' with the 'U" parameter when opening 'self.svm_data'. This allows newlines to be understood regardless, if the newline character was created in osx, windows, or linux. Note: since 'row' is a list, with one comma-delimited string element, the following line is required in this method: row = row[0].split(',') """ list_dataset = [] list_observation_label = [] list_feature_label = [] # open temporary 'csvfile' reader object dataset_reader = csv.reader(self.svm_data, delimiter=' ', quotechar='|') # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_feature_label.append(value) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_observation_label.append(value) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not self.count_features: self.count_features = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate( islice(row_indep_variable, 1, None)): try: validate = Validate_Dataset(value) validate.validate_value() list_error = validate.get_errors() if list_error: print list_error return None else: value = float(value) except Exception as error: print error return False list_dataset.append({ 'dep_variable_label': list_observation_label[dep_index], 'indep_variable_label': list_feature_label[indep_index], 'indep_variable_value': value }) # close file, save observation labels, and return self.svm_data.close() self.observation_labels = list_observation_label return list_dataset