Beispiel #1
0
def verify_working_directory(directory):
    train_files = prsg.ordering_files(directory)
    if len(train_files) == 0:
        print("ERROR : found no file nor directory")
        print("This may be the wrong folder")
        return 0
    else:
        for filename in train_files:
            path = directory + filename
            # print(path)
            if (os.path.isdir(path)):
                if (filename == CSV_FOLDER_NAME[:-1]
                        or filename == SPLITTED_FOLDER_NAME[:-1]):
                    continue
                else:
                    print("WARNING : found an non initial folder")
                    print("This could be the wrong folder, be carefull !")
                    # return 0
                    continue
            elif (os.path.isfile(path)):
                if (filename.endswith('.txt') or filename.endswith('wav')):
                    # print(filename+" looks like an expected file")
                    continue
                else:
                    print('ERROR : found a non \'txt\' nor \'wav\' file')
                    print("This should be the wrong folder")
                    return 0
            else:
                print("ERROR : found no file nor directory")
                print("This is the wrong folder")
                return 0
        return 1
Beispiel #2
0
def split_record_in_cycle(dir,file_csv,output_dir) :
    pbar = ProgressBar()
    lines = prsg.nb_lines(file_csv)
    with open(file_csv, newline='') as csvfile:
        data = list(csv.reader(csvfile))
    # print(data)
    input_dir = prsg.ordering_files(dir)
    # print(input_dir)
    i=0
    for filename in pbar(input_dir):
        if(filename.endswith('.wav')):
            cpt=1
            save_file_name = filename[:-4]
            print()
            filename = data[i][0]
            while data[i][0] == save_file_name:
                print("Processed record = "+data[i][0]+" nb cycle = "+str(cpt))
                myaudio = pydub.AudioSegment.from_wav(dir+data[i][0]+".wav")
                chunk_data = myaudio[int(float(data[i][1])*1000):int(float(data[i][2])*1000)]
                saved_file = (output_dir+save_file_name+"_"+"{0:0=2d}".format(cpt)+".wav")
                # print("saved cycle name = "+saved_file)
                chunk_data.export(saved_file, format="wav")
                i+=1
                cpt+=1
                if i == lines:
                    break
    return i
Beispiel #3
0
input_train_dir = arguments[1] + TRAIN_FOLDER

input_test_dir = arguments[1] + TEST_FOLDER

out_train_csv = open(arguments[1] + 'ft_train.csv', 'w')
out_test_csv = open(arguments[1] + 'ft_test.csv', 'w')

#########
# TRAIN #
#########

### STEP 3 : Compute features representing audio
################################################
# features_train_dir,status = prsg.verify_folder(input_train_dir,FEATURES_FOLDER_NAME) # Verify if folder already exists and if it is empty or not
list_features = prsg.ordering_files(input_train_dir)
# if(status == 0):
# print("ERROR : Can not find nor create the asked folder")
# sys.exit()
# elif(status == 1):
print("TRAIN")
for f in pbar(list_features):
    filename = f[:-4]
    ft_path = input_train_dir + filename
    print(input_train_dir + f)
    if (os.path.isfile(input_train_dir + f)):
        # print(f)
        ft = essentia_lowlevel_features_computation(input_train_dir, f)
        # print(ft)
        out_train_csv.write(filename)
        for val in ft:
csv_train_file = csv_train_dir + CSV_FILE_NAME  # define the name of the folder according to the previous step
cycles_train_dir, status = prsg.verify_folder(
    input_train_dir, SPLITTED_FOLDER_NAME
)  # Verify if folder already exists and if it is empty or not
if (status == 0):
    print("ERROR : Can not find nor create the asked folder")
    sys.exit()
elif (status == 1):
    split_record_in_cycle(input_train_dir, csv_train_file, cycles_train_dir)

### STEP 3 : Compute features representing audio
################################################
features_train_dir, status = prsg.verify_folder(
    input_train_dir, FEATURES_FOLDER_NAME
)  # Verify if folder already exists and if it is empty or not
list_features = prsg.ordering_files(cycles_train_dir)
if (status == 0):
    print("ERROR : Can not find nor create the asked folder")
    sys.exit()
elif (status == 1):
    for f in list_features:
        ft_path = features_train_dir + f[:-4]
        ft = essentia_lowlevel_features_computation(cycles_train_dir, f)
        pickle.dump(ft, open(ft_path, 'wb'))

########
# TEST #
########

### STEP 1 : Preparing CSV file
################################################
Beispiel #5
0
def parsing_data_to_csv(path_to_data_folder, path_to_diagnostic_file,
                        path_to_csv_folder, csv_filename):
    input_data_dir = path_to_data_folder
    input_info = path_to_diagnostic_file
    output_dir = path_to_csv_folder
    # os.makedirs(output_dir, exist_ok=True)
    output = output_dir + csv_filename
    out_file = open(output, "w")

    cpt = 0
    # nb_files = (len(os.listdir(input_data_dir)))/2
    nb_files = prsg.nb_files(input_data_dir) / 2

    with open(input_info) as fp:
        diagnostics = fp.read().splitlines()

    # ordered_files = sorted(os.listdir(input_data_dir))
    ordered_files = prsg.ordering_files(input_data_dir)

    for filename in ordered_files:
        if (filename.endswith('txt')):
            cpt += 1
            input = input_data_dir + filename
            input_file = open(input, 'r')

            content = input_file.readline()
            file_id = filename[:-4]

            patient_number, record_index, body_area, channel, record_tool = file_id.split(
                "_")
            pathology = 0

            for i in range(0, len(diagnostics) - 1):
                tmp_patient, tmp_pathology = diagnostics[i].split("\t")

                if patient_number == tmp_patient:
                    if (tmp_pathology == "Asthma"):
                        pathology = 0
                    if (tmp_pathology == "LRTI"):
                        pathology = 1
                    if (tmp_pathology == "Pneumonia"):
                        pathology = 2
                    if (tmp_pathology == "Bronchiectasis"):
                        pathology = 3
                    if (tmp_pathology == "Bronchiolitis"):
                        pathology = 4
                    if (tmp_pathology == "URTI"):
                        pathology = 5
                    if (tmp_pathology == "COPD"):
                        pathology = 6
                    if (tmp_pathology == "Healthy"):
                        pathology = 7

            while content:
                start_time, end_time, crackle, wheeze = content.split('\t')

                # out_file.write(patient_number+","+record_index+","+body_area+","+channel+","+record_tool+","+start_time+","+end_time+","+str(pathology)+","+crackle+","+wheeze)
                out_file.write(file_id + "," + start_time + "," + end_time +
                               "," + str(pathology) + "," + crackle + "," +
                               wheeze)
                content = input_file.readline()