def exec(self, task_input, task_output): path = task_output["res_loc"] path = os.path_join(path, "apple_health_export", "electrocardiograms") resources = os.dir_res_list(path) workers = [] for elem in resources: res_path = os.path_join(path, elem) if os.is_path_file(res_path): splitted_elem = elem.split('.') backup_name = f"{splitted_elem[0]}.bak" worker = DataPreparerWorker(res_path, backup_name) worker.start() workers.append(worker) for worker in workers: worker.join() task_output["res_loc"] = path
def exec(self, task_input, task_output): labels_information_loc = task_input["labels_information_location"] resources_path = task_output["res_loc"] labels_info = jReader.parse_data(labels_information_loc) os.makedirs(task_input["training_loc"], True) resources = os.dir_res_list(resources_path) training_path = f"{task_input['training_loc']}{os.get_path_seperator()}training.csv" labels_path = f"{task_input['training_loc']}{os.get_path_seperator()}labels.csv" with open(training_path, 'w') as training_file: with open(labels_path, 'w') as labels_file: labels_file.write("Sinus, AF\n") index = 0 while index < task_input["readings"]: training_file.write(f"x_{index}") index += 1 if index < task_input["readings"]: training_file.write(',') training_file.write("\n") self.prep_training_file(training_file, labels_file, labels_info, task_output["res_loc"], resources)
def _normalize_elements(self, res_elems, task_output): for res in res_elems: if "f1" not in res: continue data = loader.load_data(os.path_join(task_output["res_loc"], res)) with open(os.path_join(task_output["res_loc"], res), 'w') as dfile: for elem in data.values: off_value = 0 if elem[2] < 16000: off_value = 15000 elif elem[2] < 17000: off_value = 16000 elif elem[2] < 18000: off_value = 17000 elif elem[2] < 19000: off_value = 18000 else: off_value = 19000 temp_d_elem = elem[2] - off_value dfile.write(f"{elem[0]},{elem[1]},{temp_d_elem}\n")
def run(self): path_split = self.path.split(os.get_path_seperator()) backup_name = path_split[len(path_split) - 1] backup_name = backup_name.split('.')[0] backup = os.copy_file(self.path, f"{backup_name}{self.backup_name_index}.bak") with open(self.path, 'w') as data_file: with open(backup) as backup_file: data_file.write(backup_file.readline()) data_reading = backup_file.readline() index = 0 while index < self.readings and data_reading != "": data_file.write(data_reading) data_reading = backup_file.readline() index += 1 os.remove_file(backup)
def exec(self, task_input, task_output): task_output["readings"] = self.data_readings resources = os.dir_res_list(task_output["res_loc"]) splitter_threads = [] for elem in resources: res_path = os.path_join(task_output["res_loc"], elem) if not os.is_path_file(res_path): continue splitter = _Splitter(elem, res_path, self.data_readings) splitter.start() splitter_threads.append(splitter) if len(splitter_threads) > 5: for thread in splitter_threads: thread.join() splitter_threads = [] for thread in splitter_threads: thread.join()
def __init__(self, **kwargs): threading.Thread.__init__(self) self.fantasia = os.path_join(kwargs["training"], kwargs["fantasia"]) self.nsr = os.path_join(kwargs["training"], kwargs["nsr"]) self.ltaf = os.path_join(kwargs["training"], kwargs["ltaf"]) self.path = kwargs["path"] self.concat_rate = kwargs["concat_rate"]
def _merge_training(self, training, backup): with open(backup) as backup_training: temp = backup_training.readline() while temp != "": training.write(temp) temp = backup_training.readline() os.remove_file(backup)
def _calc_labels_loc(self, path): splitted = path.split(os.get_path_seperator()) label_path = f"{splitted[0]}{os.get_path_seperator()}{splitted[1]}" for path_elem in range(2, len(splitted) - 1): label_path = os.path_join(label_path, splitted[path_elem]) path_name = f"{splitted[len(splitted) - 1][:-4]}_labels.csv" return path, os.path_join(label_path, path_name)
def exec(self, task_input, task_output): self._prep_dir(task_input["model_loc"]) model_path = os.path_join(task_input["model_loc"], f'model-{task_input["inputs"]}') if not task_input["model_override"] and os.is_dir(model_path): #TODO implement a dynamic "path increment" model-name save functionality pass task_output["model"].save(model_path)
def exec(self, task_input, task_output): target_labels_loc = task_input["labels_loc"] #TODO - make sure folder exists os.makedirs(target_labels_loc, True) #TODO - join taining folder with name of label file target_labels_loc = os.path_join(target_labels_loc, "labels.csv") data_labels_mappings = jReader.parse_data( task_input["data_labels_mappings"]) self.create_labels_file(target_labels_loc, data_labels_mappings)
def exec(self, task_input, task_output): self.headders = task_input["headders"] res_loc = task_output["res_loc"] folder_res = op.dir_res_list(res_loc) for folder_res_elem in folder_res: target_res = op.path_join(res_loc, folder_res_elem) try: self._apply_headders(target_res) except Exception as e: print(e)
def get_min_readings(self, path): resources = os.dir_res_list(path) readings = [] for elem in resources: readings.append(self.count(os.path_join(path, elem))) min_reading = readings[0] for elem in readings: if elem < min_reading: min_reading = elem return min_reading - 1
def _split(self, file, headers): temp_reading = file.readline() split_index = 1 while temp_reading != "": index_at = 0 readings = [] while index_at < self.readings: if temp_reading != "": readings.append(temp_reading) temp_reading = file.readline() index_at += 1 split_path = os.path_join(self.containing_folder.split('.')[0], f"{self.readings}_ecg_split_{split_index}.csv") split_thread = _Save_split(readings, headers, split_path) split_thread.start() split_index += 1
def prep_training_file(self, training_writer, labels_writer, labels_info, resources_loc, resources): for label_info in labels_info["mappings"]: resource = self.get_resource(label_info["file_name"], resources) medical_condition = label_info["medical_condition"] if resource != None and (medical_condition == "Sinusrytme" or medical_condition == "Artrieflimren"): with open(os.path_join(resources_loc, resource)) as data_file: data_file.readline() reading = data_file.readline() while reading != "": splitted_reading = reading.split("\n") training_writer.write(splitted_reading[0]) reading = data_file.readline() if reading != "": training_writer.write(',') training_writer.write("\n") if medical_condition == "Sinusrytme": labels_writer.write("1,0\n") elif medical_condition == "Artrieflimren": labels_writer.write("0,1\n")
def append_training_set(self, file, path, ressources): for res_elem in ressources: training_data = "" if not self._is_ecg(res_elem): continue res_path = os.path_join(path, res_elem) temp_data = loader.load_data(res_path) if self._is_missing_inputs(len(temp_data.values)): os.remove_file(res_path) continue training_data = self._append_data_set(training_data, temp_data) file.write(training_data) os.remove_file(res_path)
def _save_training_samples(self, rec_loc, indexes): copied_file = os.copy_file(rec_loc, "temp.bak") index = 0 with open(copied_file) as training_copy: with open(rec_loc, 'w') as training: training.write(training_copy.readline()) for elem in indexes: for _ in range(index, elem): index += 1 training.write(training_copy.readline()) training_copy.readline() index += 1 os.remove_file(copied_file)
def _setup(self, task_output): self.training_readings = task_output['readings'] ressources = os.dir_res_list(task_output["res_loc"]) training_data = self._training_loc(task_output["res_loc"]) task_output["training_loc"] = training_data backup = self._bakup_saved(training_data) return ressources, training_data, backup
def exec(self, task_input, task_output): #res_name = task_output["res_loc"].split("/") is_not_fantasia = task_output["res_loc"][-len("fantasia" ):] != "fantasia" if is_not_fantasia: return ressources = os.dir_res_list(task_output["res_loc"]) self._normalize_elements(ressources, task_output)
def run(self): file_to_backup = os.path_join(self.path, self.resource) backup_name = f"{self.resource.split('.')[0]}.bak" backup_path = os.copy_file(file_to_backup, backup_name) with open(os.path_join(self.path, self.resource), 'w') as datafile: with open(backup_path) as data: datafile.write(data.readline()) temp = data.readline() while temp != "": normalized_reading = self.data_reading_normalizer(temp) datafile.write(normalized_reading) temp = data.readline() os.remove_file(backup_path)
def exec(self, task_input, task_output): min_readings = self.get_min_readings(task_output["res_loc"]) index = 0 resources = os.dir_res_list(task_output["res_loc"]) workers = [] for elem in resources: path = os.path_join(task_output["res_loc"], elem) worker = NormalizeReadingsCountWorker(path, min_readings, index) worker.start() workers.append(worker) index += 1 for worker in workers: worker.join()
def _spawn_worker(self, task_input, name, *args): concat_train_path = os.path_join(task_input["training"], name) t = _Concatter(fantasia=args[0], nsr=args[1], ltaf=args[2], training=task_input["training"], path=concat_train_path, concat_rate=task_input["concat_rate"]) t.start() return t
def exec(self, task_input, task_output): resources = os.dir_res_list(task_output["res_loc"]) workers = [] for resource in resources: worker = DataNormalizer(task_output["res_loc"], resource) worker.start() workers.append(worker) for worker in workers: worker.join()
def run(self): backup_path = os.copy_file(self.res_path, self.backup_name) with open(self.res_path, 'w') as data_file: with open(backup_path) as backup: data = backup.read() splitted_data = data.split("\n") data_file.write("readings\n") index = 0 for elem in splitted_data: try: reading = self.get_reading_value(elem) if reading != None: data_file.write(f"{reading}\n") index += 1 except: pass os.remove_file(backup_path)
def run(self): fantasia_counter = self._count(self.fantasia) nsr_counter = self._count(self.nsr) ltaf_counter = self._count(self.ltaf) with open(self.path, 'w') as res_file: fantasia = open(self.fantasia) nsr = open(self.nsr) ltaf = open(self.ltaf) fantasia_temp = fantasia.readline() nsr_temp = nsr.readline() ltaf_temp = ltaf.readline() res_file.write(fantasia_temp) ltaf_read_rate = (ltaf_counter // (fantasia_counter + nsr_counter)) - 1 ltaf_counter = fantasia_counter + nsr_counter while fantasia_temp != "" and nsr_temp != "" and ltaf_temp != "": fantasia_temp = self._write_data(res_file, fantasia, fantasia_counter, self.concat_rate, False) nsr_temp = self._write_data(res_file, nsr, nsr_counter, self.concat_rate, False) ltaf_temp = self._write_data(res_file, ltaf, ltaf_counter, self.concat_rate, True, ltaf_read_rate) while fantasia_temp != "" and ltaf_temp != "": fantasia_temp = self._write_data(res_file, fantasia, fantasia_counter, self.concat_rate, False) ltaf_temp = self._write_data(res_file, ltaf, ltaf_counter, self.concat_rate, True, ltaf_read_rate) while nsr_temp != "" and ltaf_temp != "": nsr_temp = self._write_data(res_file, nsr, nsr_counter, self.concat_rate, False) ltaf_temp = self._write_data(res_file, ltaf, ltaf_counter, self.concat_rate, True, ltaf_read_rate) fantasia.close() nsr.close() ltaf.close() os.remove_file(self.fantasia) os.remove_file(self.nsr) os.remove_file(self.ltaf)
def _training_loc(self, res_loc): path_split = res_loc.split(os.get_path_seperator()) training_path = f"{path_split[0]}{os.get_path_seperator()}{path_split[1]}" for x in range(2, len(path_split) - 1): training_path = os.path_join(training_path, path_split[x]) training_path = os.path_join(training_path, "training") os.makedirs(training_path, True) return os.path_join(training_path, f"{path_split[len(path_split) - 1]}.csv")
def _load_data(self, path, log): data_sets = [] res_name = os.get_ressource_path_name(path) data = self._load(path) percent = 0 index = 0 for index, data_set in enumerate(data): index += 1 if len(data_set) > 0: self._prep_data_set(data_set, data_sets) if log: percent, _ = self._log_process(len(data), index, res_name, percent) return np.array(data_sets)
def exec(self, task_input, task_output): ressources, training_data, backup = self._setup(task_output) with open(training_data, 'w') as training: if backup is not None and os.is_path_file(backup): self._merge_training(training, backup) else: headders = self._get_headders(task_output["readings"]) training.write(headders) for split_folder in ressources: split_dir = os.path_join(task_output["res_loc"], split_folder) if os.is_path_file(split_dir) and not os.is_dir(split_dir): continue self.append_training_set(training, split_dir, os.dir_res_list(split_dir)) os.remove_dir(split_dir)
def _bakup_saved(self, training_loc): if not os.is_path_file(training_loc): return return os.copy_file(training_loc, "temp.bak")
def exec(self, task_input, task_output): sampling_frequency = task_input["sampled_frequency"] target_frequency = task_input["target_frequency"] doubling_rate = math.ceil(sampling_frequency / (((target_frequency / sampling_frequency) - 1) * sampling_frequency)) res_elems = os.dir_res_list(task_output["res_loc"]) for res_elem in res_elems: origin_path = os.path_join(task_output["res_loc"], res_elem) if os.is_path_file(origin_path): backup_path = os.copy_file(origin_path, f"{task_input['name']}_bakup.bak") with open(origin_path, 'w') as file: with open(backup_path) as backup_file: temp_reading = backup_file.readline() sampling_nr = 1 while temp_reading != "": temp_reading = backup_file.readline() if temp_reading != "": splitted = temp_reading.split(',') file.write(f"{sampling_nr},{splitted[1]},{splitted[2]}") if "f2" in res_elem: file.write("\n") sampling_nr += 1 for _ in range(1, doubling_rate): temp_reading = backup_file.readline() if temp_reading != "": splitted = temp_reading.split(',') file.write(f"{sampling_nr},{splitted[1]},{splitted[2]}") if "f2" in res_elem: file.write("\n") sampling_nr += 1 if temp_reading != "": splitted = temp_reading.split(',') file.write(f"{sampling_nr},{splitted[1]},{splitted[2]}") if "f2" in res_elem: file.write("\n") sampling_nr += 1 splitted = temp_reading.split(',') file.write(f"{sampling_nr},{splitted[1]},{splitted[2]}") if "f2" in res_elem: file.write("\n") sampling_nr += 1 temp_reading = backup_file.readline() os.remove_file(backup_path)
def _prep_split_folder(self): os.makedirs(self.res_path.split('.')[0], True)