def verify_working_directory(directory): train_files = prsg.ordering_files(directory) if len(train_files) == 0: print("ERROR : found no file nor directory") print("This may be the wrong folder") return 0 else: for filename in train_files: path = directory + filename # print(path) if (os.path.isdir(path)): if (filename == CSV_FOLDER_NAME[:-1] or filename == SPLITTED_FOLDER_NAME[:-1]): continue else: print("WARNING : found an non initial folder") print("This could be the wrong folder, be carefull !") # return 0 continue elif (os.path.isfile(path)): if (filename.endswith('.txt') or filename.endswith('wav')): # print(filename+" looks like an expected file") continue else: print('ERROR : found a non \'txt\' nor \'wav\' file') print("This should be the wrong folder") return 0 else: print("ERROR : found no file nor directory") print("This is the wrong folder") return 0 return 1
def split_record_in_cycle(dir,file_csv,output_dir) : pbar = ProgressBar() lines = prsg.nb_lines(file_csv) with open(file_csv, newline='') as csvfile: data = list(csv.reader(csvfile)) # print(data) input_dir = prsg.ordering_files(dir) # print(input_dir) i=0 for filename in pbar(input_dir): if(filename.endswith('.wav')): cpt=1 save_file_name = filename[:-4] print() filename = data[i][0] while data[i][0] == save_file_name: print("Processed record = "+data[i][0]+" nb cycle = "+str(cpt)) myaudio = pydub.AudioSegment.from_wav(dir+data[i][0]+".wav") chunk_data = myaudio[int(float(data[i][1])*1000):int(float(data[i][2])*1000)] saved_file = (output_dir+save_file_name+"_"+"{0:0=2d}".format(cpt)+".wav") # print("saved cycle name = "+saved_file) chunk_data.export(saved_file, format="wav") i+=1 cpt+=1 if i == lines: break return i
input_train_dir = arguments[1] + TRAIN_FOLDER input_test_dir = arguments[1] + TEST_FOLDER out_train_csv = open(arguments[1] + 'ft_train.csv', 'w') out_test_csv = open(arguments[1] + 'ft_test.csv', 'w') ######### # TRAIN # ######### ### STEP 3 : Compute features representing audio ################################################ # features_train_dir,status = prsg.verify_folder(input_train_dir,FEATURES_FOLDER_NAME) # Verify if folder already exists and if it is empty or not list_features = prsg.ordering_files(input_train_dir) # if(status == 0): # print("ERROR : Can not find nor create the asked folder") # sys.exit() # elif(status == 1): print("TRAIN") for f in pbar(list_features): filename = f[:-4] ft_path = input_train_dir + filename print(input_train_dir + f) if (os.path.isfile(input_train_dir + f)): # print(f) ft = essentia_lowlevel_features_computation(input_train_dir, f) # print(ft) out_train_csv.write(filename) for val in ft:
csv_train_file = csv_train_dir + CSV_FILE_NAME # define the name of the folder according to the previous step cycles_train_dir, status = prsg.verify_folder( input_train_dir, SPLITTED_FOLDER_NAME ) # Verify if folder already exists and if it is empty or not if (status == 0): print("ERROR : Can not find nor create the asked folder") sys.exit() elif (status == 1): split_record_in_cycle(input_train_dir, csv_train_file, cycles_train_dir) ### STEP 3 : Compute features representing audio ################################################ features_train_dir, status = prsg.verify_folder( input_train_dir, FEATURES_FOLDER_NAME ) # Verify if folder already exists and if it is empty or not list_features = prsg.ordering_files(cycles_train_dir) if (status == 0): print("ERROR : Can not find nor create the asked folder") sys.exit() elif (status == 1): for f in list_features: ft_path = features_train_dir + f[:-4] ft = essentia_lowlevel_features_computation(cycles_train_dir, f) pickle.dump(ft, open(ft_path, 'wb')) ######## # TEST # ######## ### STEP 1 : Preparing CSV file ################################################
def parsing_data_to_csv(path_to_data_folder, path_to_diagnostic_file, path_to_csv_folder, csv_filename): input_data_dir = path_to_data_folder input_info = path_to_diagnostic_file output_dir = path_to_csv_folder # os.makedirs(output_dir, exist_ok=True) output = output_dir + csv_filename out_file = open(output, "w") cpt = 0 # nb_files = (len(os.listdir(input_data_dir)))/2 nb_files = prsg.nb_files(input_data_dir) / 2 with open(input_info) as fp: diagnostics = fp.read().splitlines() # ordered_files = sorted(os.listdir(input_data_dir)) ordered_files = prsg.ordering_files(input_data_dir) for filename in ordered_files: if (filename.endswith('txt')): cpt += 1 input = input_data_dir + filename input_file = open(input, 'r') content = input_file.readline() file_id = filename[:-4] patient_number, record_index, body_area, channel, record_tool = file_id.split( "_") pathology = 0 for i in range(0, len(diagnostics) - 1): tmp_patient, tmp_pathology = diagnostics[i].split("\t") if patient_number == tmp_patient: if (tmp_pathology == "Asthma"): pathology = 0 if (tmp_pathology == "LRTI"): pathology = 1 if (tmp_pathology == "Pneumonia"): pathology = 2 if (tmp_pathology == "Bronchiectasis"): pathology = 3 if (tmp_pathology == "Bronchiolitis"): pathology = 4 if (tmp_pathology == "URTI"): pathology = 5 if (tmp_pathology == "COPD"): pathology = 6 if (tmp_pathology == "Healthy"): pathology = 7 while content: start_time, end_time, crackle, wheeze = content.split('\t') # out_file.write(patient_number+","+record_index+","+body_area+","+channel+","+record_tool+","+start_time+","+end_time+","+str(pathology)+","+crackle+","+wheeze) out_file.write(file_id + "," + start_time + "," + end_time + "," + str(pathology) + "," + crackle + "," + wheeze) content = input_file.readline()