def createThread(threadIndex, queryString, pcdbFileName, outputDir): paddedNum = ('%0' + str(config.numDivisionDigits) + 'd') % threadIndex minTime = helper.interpolate(threadIndex, config.numSearchDivisions, config.startTime, config.endTime) maxTime = helper.interpolate(threadIndex + 1, config.numSearchDivisions, config.startTime, config.endTime) return FlickrDownloaderThread(queryString, pcdbFileName, minTime, maxTime, outputDir, paddedNum)
def data_confidence(patients, patient, code): # Attributes length_arr = 0 arr_ = [] list_ = [] isAnalyzable = False # All patients for i in patients[:70]: df_ = i.data.loc[i.data['CODE'] == code] arr_.append(len(df_)) length_arr = arr_ avg_len = math.ceil(((np.sum(length_arr)) / (len(length_arr)))) # Our patient for i in patient.data: patient_df = patient.data.loc[patient.data['CODE'] == code] pat_len = len(patient_df) pat_val_arr = patient_df[['VALUE']] # Confidence Model (comparing all patients to THE patient) threshold = avg_len * 0.6 if pat_len < threshold: pass elif (pat_len > threshold and pat_len < avg_len): pat_val_arr_expanded = helper.interpolate(pat_val_arr, avg_len) print("-----------------") print("| Patient #", patient.getPatientId(), " |") print("|", len(pat_val_arr_expanded), " |") print("|", len(patient_df[['VALUE']]), " |") print("-----------------") isAnalyzable = True elif pat_len > avg_len: isAnalyzable = True #print(avg_len) return isAnalyzable
def get_mfcc(signal1, signal2, count, label, uuid, subject): """ This function takes the input signals and converts them to MFCCs and saves the converted representations to a destination directory in the hdf5 format. :param signal1: :param signal2: :param count: :param label: :param uuid: :param subject: :return: """ destination = "../data/{}/".format(des_string) if show_plot: line1, = mp.plot(signal1 / 1000., label="Channel 1") _, = mp.plot(signal2 / 1000., label="Channel 2") mp.rcParams.update({'font.size': 20}) mp.locator_params(axis='y', nticks=3) mp.yticks() mp.xlabel("Samples (4000 per second)") mp.legend(handler_map={line1: HandlerLine2D(numpoints=4)}) mp.ylabel("Milli volts") mp.show() m1_array = [signal1] m2_array = [signal2] """ One in every n is in test and one in every n is in validation. Other (n-2) are in training. """ test_data = (count % test_fraction) == 1 validation_data = (count % test_fraction) == 2 if not test_data and not validation_data: """ Data augmentation step. Data augmentation is only done for the training dataset, not for testing or validation. """ for noise in noises: for coefficient in noise_coefficients: # Add moise to the signal m1_array.append(add_noise(signal1, float(coefficient), noise)) m2_array.append(add_noise(signal2, float(coefficient), noise)) if apply_shift: # SHIFT RIGHT m1_array.append(np.roll(signal1, int(len(signal1) * 0.05))) m2_array.append(np.roll(signal2, int(len(signal2) * 0.05))) # SHIFT LEFT m1_array.append(np.roll(signal1, int(len(signal1) * 0.05 * -1))) m2_array.append(np.roll(signal2, int(len(signal2) * 0.05 * -1))) if apply_stretch: # Stretch the signal m1_array.append(stretch(signal1, 0.8)) m2_array.append(stretch(signal2, 0.8)) m1_array.append(stretch(signal1, 1.2)) m2_array.append(stretch(signal2, 1.2)) for i in range(len(m1_array)): """ Convert the original and augmented data to their MFCC representations. """ signal1 = m1_array[i] signal2 = m2_array[i] if len(signal1) < 1: continue m1 = mfcc(signal1, samplerate=sampling_frequency, numcep=numcep, nfilt=nfilt, highfreq=highfreq) m2 = mfcc(signal2, samplerate=sampling_frequency, numcep=numcep, nfilt=nfilt, highfreq=highfreq) if i == 0: lengths.append(len(m1)) if show_plot: print(len(m1), len(m1[0])) if apply_interpolation: # Use interpolation to make the converted data have the same sizes m1 = interpolate(m1, new_length) m2 = interpolate(m2, new_length) if apply_padding: # Use padding to make the converted data have the same sizes m1 = pad(m1, new_length, True) m2 = pad(m2, new_length, True) if apply_stack: # Stack the signals to make them comparable to multi-channel inputs to the models. Helpful for CNNs. signal = np.dstack((m1, m2)) else: m1 = m1.T m2 = m2.T signal = np.concatenate((m1, m2)) signal = signal.T normalize(signal, axis=0) if show_plot: """ Show converted MFCC representations """ if i == 1: fig, ax = mp.subplots() mfcc_data = np.swapaxes(m1, 0, 1) ax.imshow(mfcc_data, interpolation='nearest', origin='lower') ax.set_title('MFCC Representation of Channel 1') mp.xlabel("Time Frames") mp.ylabel("Mel Bands") mp.show() fig, ax = mp.subplots() mfcc_data = np.swapaxes(m2, 0, 1) ax.imshow(mfcc_data, interpolation='nearest', origin='lower') ax.set_title('MFCC Representation of Channel 2') mp.xlabel("Time Frames") mp.ylabel("Mel Bands") mp.show() """ Round the converted signals to 4 decimal places only. Made unreachable for now. """ if False: signal = np.round(signal, decimals=4) """ Save the files """ data = {"label": labels.index(label), "signal": signal} if not os.path.exists(destination): os.makedirs(destination) sub_destination = destination + subject + "/" if not os.path.exists(sub_destination): os.makedirs(sub_destination) if not os.path.exists(sub_destination + label): os.makedirs(sub_destination + label) if not os.path.exists(sub_destination + label + "/test"): os.makedirs(sub_destination + label + "/test") if not os.path.exists(sub_destination + label + "/validation"): os.makedirs(sub_destination + label + "/validation") if test_data: file_location = "{}{}/test/{}_{}_{}.hdf5".format( sub_destination, label, uuid, str(count), str(i)) elif validation_data: file_location = "{}{}/validation/{}_{}_{}.hdf5".format( sub_destination, label, uuid, count, str(i)) else: file_location = "{}{}/{}_{}_{}.hdf5".format( sub_destination, label, uuid, str(count), str(i)) with h5py.File(file_location, 'w') as f: for k, v in data.items(): f.create_dataset(k, data=np.array(v, dtype=float)) return
def splitQuery(self, numImages, minTime, maxTime, splitAttempts): targetNumChunks = math.ceil(numImages / config.targetPhotosPerPage) for chunk in range(0, targetNumChunks): newMin = helper.interpolate(chunk, targetNumChunks, minTime, maxTime) newMax = helper.interpolate(chunk + 1, targetNumChunks, minTime, maxTime) self.queryTimeRange(newMin, newMax, splitAttempts + 1)