def xml_to_dict(self): list_dataset = [] observation_label = [] # convert xml file to python 'dict' dataset = xmltodict.parse(self.svm_file) # build 'list_dataset' for dep_variable in dataset['dataset']['observation']: dep_variable_label = dep_variable['dependent-variable'] validate = Validate_Dataset(dep_variable_label) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: observation_label.append(dep_variable_label) for indep_variable in dep_variable['independent-variable']: indep_variable_label = indep_variable['label'] indep_variable_value = indep_variable['value'] validate_label = Validate_Dataset(indep_variable_label) validate_value = Validate_Dataset(indep_variable_value) validate_label.validate_label() validate_value.validate_value() list_error_label = validate.get_errors() list_error_value = validate.get_errors() if list_error_label or list_error_value: print list_error_label print list_error_value return None else: list_dataset.append({'dep_variable_label': dep_variable_label, 'indep_variable_label': indep_variable_label, 'indep_variable_value': indep_variable_value}) # generalized feature count in an observation if not self.count_features: self.count_features = len(dep_variable['independent-variable']) # close file, save observation labels, and return self.svm_file.close() self.observation_labels = observation_label return list_dataset
def svm_csv_converter(raw_data): '''@svm_csv_converter This method converts the supplied csv file-object, intended for an svm model, to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. Note: we use the 'Universal Newline Support' with the 'U' parameter when opening 'raw_data'. This allows newlines to be understood regardless, if the newline character was created in osx, windows, or linux. Note: since 'row' is a list, with one comma-delimited string element, the following line is required in this method: row = row[0].split(',') ''' feature_count = None list_dataset = [] list_observation_label = [] list_feature_label = [] logger = Logger(__name__, 'error', 'error') # open temporary 'csvfile' reader object dataset_reader = csv.reader(raw_data, delimiter=' ', quotechar='|') # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: list_feature_label.append(value) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: list_observation_label.append(value) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not feature_count: feature_count = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate(islice(row_indep_variable, 1, None)): try: validate = Validate_Dataset(value) validate.validate_value() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: value = float(value) except Exception as error: logger.log(error) return False list_dataset.append({ 'dep_variable_label': list_observation_label[dep_index], 'indep_variable_label': list_feature_label[indep_index], 'indep_variable_value': value }) # close file, save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
def svm_xml_converter(raw_data): '''@svm_xml_converter This method converts the supplied xml file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. ''' feature_count = None list_dataset = [] list_observation_label = [] logger = Logger(__name__, 'error', 'error') # convert xml file to python 'dict' dataset = xmltodict.parse(raw_data) # build 'list_dataset' for observation in dataset['dataset']['observation']: observation_label = observation['dependent-variable'] validate = Validate_Dataset(observation_label) validate.validate_label() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: list_observation_label.append(observation_label) for feature in observation['independent-variable']: feature_label = feature['label'] feature_value = feature['value'] validate_label = Validate_Dataset(feature_label) validate_value = Validate_Dataset(feature_value) validate_label.validate_label() validate_value.validate_value() list_error_label = validate.get_errors() list_error_value = validate.get_errors() if list_error_label or list_error_value: logger.log(list_error_label) logger.log(list_error_value) return None else: list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation['independent-variable']) # save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
def svm_json_converter(raw_data, is_json): '''@svm_json_converter This method converts the supplied json file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @is_json, flag indicating 'raw_data' is a json string. @observation_labels, is a list containing dependent variable labels. ''' feature_count = None list_dataset = [] observation_labels = [] logger = Logger(__name__, 'error', 'error') if is_json: dataset = raw_data else: dataset = json.load(raw_data) for observation_label in dataset: # variables observations = dataset[observation_label] # validation (part 1) validate_olabel = Validate_Dataset(observation_label) validate_olabel.validate_label() # dependent variable with single observation if type(observations) == list: for observation in observations: for feature_label, feature_value in observation.items(): # validation (part 2) validate_flabel = Validate_Dataset(feature_label) validate_flabel.validate_label() validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() # restructured data list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation) # dependent variable with multiple observations elif type(observations) == dict: for feature_label, feature_value in observations.items(): # validation (part 2) validate_flabel = Validate_Dataset(feature_label) validate_flabel.validate_label() validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() # restructured data list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observations) # list of observation label observation_labels.append(observation_label) # check for errors olabel_error = validate_olabel.get_errors() flabel_error = validate_flabel.get_errors() fvalue_error = validate_fvalue.get_errors() for error in [olabel_error, flabel_error, fvalue_error]: if error: logger.log(error) if error and len(error) > 0: return None # close file if not is_json: raw_data.close() # save observation labels, and return return { 'dataset': list_dataset, 'observation_labels': observation_labels, 'feature_count': feature_count }
def csv_to_dict(self): """@csv_to_dict This method converts the supplied csv file-object to a python dictionary. @list_observation_label, is a list containing dependent variable labels. Note: we use the 'Universal Newline Support' with the 'U" parameter when opening 'self.svm_data'. This allows newlines to be understood regardless, if the newline character was created in osx, windows, or linux. Note: since 'row' is a list, with one comma-delimited string element, the following line is required in this method: row = row[0].split(',') """ list_dataset = [] list_observation_label = [] list_feature_label = [] # open temporary 'csvfile' reader object dataset_reader = csv.reader( self.svm_data, delimiter=' ', quotechar='|' ) # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_feature_label.append(value) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_observation_label.append(value) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not self.count_features: self.count_features = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate( islice(row_indep_variable, 1, None) ): try: validate = Validate_Dataset(value) validate.validate_value() list_error = validate.get_errors() if list_error: print list_error return None else: value = float(value) except Exception as error: print error return False list_dataset.append({ 'dep_variable_label': list_observation_label[dep_index], 'indep_variable_label': list_feature_label[indep_index], 'indep_variable_value': value }) # close file, save observation labels, and return self.svm_data.close() self.observation_labels = list_observation_label return list_dataset
def xml_to_dict(self): """@xml_to_dict This method converts the supplied xml file-object to a python dictionary. @list_observation_label, is a list containing dependent variable labels. """ list_dataset = [] list_observation_label = [] # convert xml file to python 'dict' dataset = xmltodict.parse(self.svm_data) # build 'list_dataset' for observation in dataset['dataset']['observation']: observation_label = observation['dependent-variable'] validate = Validate_Dataset(observation_label) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_observation_label.append(observation_label) for feature in observation['independent-variable']: feature_label = feature['label'] feature_value = feature['value'] validate_label = Validate_Dataset(feature_label) validate_value = Validate_Dataset(feature_value) validate_label.validate_label() validate_value.validate_value() list_error_label = validate.get_errors() list_error_value = validate.get_errors() if list_error_label or list_error_value: print list_error_label print list_error_value return None else: list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not self.count_features: self.count_features = len(observation['independent-variable']) # close file, save observation labels, and return self.svm_data.close() self.observation_labels = list_observation_label return list_dataset
def svm_xml_converter(raw_data): '''@svm_xml_converter This method converts the supplied xml file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. ''' feature_count = None list_dataset = [] list_observation_label = [] logger = Logger(__name__, 'error', 'error') # convert xml file to python 'dict' dataset = xmltodict.parse(raw_data) # build 'list_dataset' for observation in dataset['dataset']['observation']: observation_label = observation['dependent-variable'] validate = Validate_Dataset(observation_label) validate.validate_label() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: list_observation_label.append(observation_label) for feature in observation['independent-variable']: feature_label = feature['label'] feature_value = feature['value'] validate_label = Validate_Dataset(feature_label) validate_value = Validate_Dataset(feature_value) validate_label.validate_label() validate_value.validate_value() list_error_label = validate.get_errors() list_error_value = validate.get_errors() if list_error_label or list_error_value: logger.log(list_error_label) logger.log(list_error_value) return None else: list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation['independent-variable']) # save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
def csv_to_dict(self): list_dataset = [] observation_label = [] indep_variable_label = [] # open temporary 'csvfile' reader object dataset_reader = csv.reader(self.svm_file, delimiter=' ', quotechar='|') # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: indep_variable_label.append(value) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: observation_label.append(value) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not self.count_features: self.count_features = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate(islice(row_indep_variable, 1, None)): try: validate = Validate_Dataset(value) validate.validate_value() list_error = validate.get_errors() if list_error: print list_error return None else: value = float(value) except Exception as error: print error return False list_dataset.append({'dep_variable_label': observation_label[dep_index], 'indep_variable_label': indep_variable_label[indep_index], 'indep_variable_value': value}) # close file, save observation labels, and return self.svm_file.close() self.observation_labels = observation_label return list_dataset
def csv_to_dict(self): """@csv_to_dict This method converts the supplied csv file-object to a python dictionary. @list_observation_label, is a list containing dependent variable labels. Note: we use the 'Universal Newline Support' with the 'U" parameter when opening 'self.svm_data'. This allows newlines to be understood regardless, if the newline character was created in osx, windows, or linux. Note: since 'row' is a list, with one comma-delimited string element, the following line is required in this method: row = row[0].split(',') """ list_dataset = [] list_observation_label = [] list_feature_label = [] # open temporary 'csvfile' reader object dataset_reader = csv.reader(self.svm_data, delimiter=' ', quotechar='|') # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_feature_label.append(value) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: validate = Validate_Dataset(value) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_observation_label.append(value) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not self.count_features: self.count_features = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate( islice(row_indep_variable, 1, None)): try: validate = Validate_Dataset(value) validate.validate_value() list_error = validate.get_errors() if list_error: print list_error return None else: value = float(value) except Exception as error: print error return False list_dataset.append({ 'dep_variable_label': list_observation_label[dep_index], 'indep_variable_label': list_feature_label[indep_index], 'indep_variable_value': value }) # close file, save observation labels, and return self.svm_data.close() self.observation_labels = list_observation_label return list_dataset
def xml_to_dict(self): """@xml_to_dict This method converts the supplied xml file-object to a python dictionary. @list_observation_label, is a list containing dependent variable labels. """ list_dataset = [] list_observation_label = [] # convert xml file to python 'dict' dataset = xmltodict.parse(self.svm_data) # build 'list_dataset' for observation in dataset['dataset']['observation']: observation_label = observation['dependent-variable'] validate = Validate_Dataset(observation_label) validate.validate_label() list_error = validate.get_errors() if list_error: print list_error return None else: list_observation_label.append(observation_label) for feature in observation['independent-variable']: feature_label = feature['label'] feature_value = feature['value'] validate_label = Validate_Dataset(feature_label) validate_value = Validate_Dataset(feature_value) validate_label.validate_label() validate_value.validate_value() list_error_label = validate.get_errors() list_error_value = validate.get_errors() if list_error_label or list_error_value: print list_error_label print list_error_value return None else: list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not self.count_features: self.count_features = len(observation['independent-variable']) # close file, save observation labels, and return self.svm_data.close() self.observation_labels = list_observation_label return list_dataset