コード例 #1
0
ファイル: flickr_downloader.py プロジェクト: Asperado/iconic
def createThread(threadIndex, queryString, pcdbFileName, outputDir):
    
    paddedNum = ('%0' + str(config.numDivisionDigits) + 'd') % threadIndex
    minTime = helper.interpolate(threadIndex, config.numSearchDivisions,
                                 config.startTime, config.endTime)
    maxTime = helper.interpolate(threadIndex + 1, config.numSearchDivisions,
                                 config.startTime, config.endTime)
    return FlickrDownloaderThread(queryString, pcdbFileName,
                                  minTime, maxTime,
                                  outputDir, paddedNum)
コード例 #2
0
ファイル: confidence.py プロジェクト: kareemarab/diabetes
def data_confidence(patients, patient, code):

    # Attributes
    length_arr = 0
    arr_ = []
    list_ = []
    isAnalyzable = False

    # All patients
    for i in patients[:70]:
        df_ = i.data.loc[i.data['CODE'] == code]
        arr_.append(len(df_))
    length_arr = arr_
    avg_len = math.ceil(((np.sum(length_arr)) / (len(length_arr))))

    # Our patient
    for i in patient.data:
        patient_df = patient.data.loc[patient.data['CODE'] == code]
    pat_len = len(patient_df)
    pat_val_arr = patient_df[['VALUE']]

    # Confidence Model (comparing all patients to THE patient)
    threshold = avg_len * 0.6
    if pat_len < threshold:
        pass
    elif (pat_len > threshold and pat_len < avg_len):
        pat_val_arr_expanded = helper.interpolate(pat_val_arr, avg_len)
        print("-----------------")
        print("| Patient #", patient.getPatientId(), " |")
        print("|", len(pat_val_arr_expanded), "           |")
        print("|", len(patient_df[['VALUE']]), "           |")
        print("-----------------")
        isAnalyzable = True
    elif pat_len > avg_len:
        isAnalyzable = True

    #print(avg_len)
    return isAnalyzable
コード例 #3
0
def get_mfcc(signal1, signal2, count, label, uuid, subject):
    """
    This function takes the input signals and converts them to MFCCs and saves the converted representations to
    a destination directory in the hdf5 format.
    :param signal1:
    :param signal2:
    :param count:
    :param label:
    :param uuid:
    :param subject:
    :return:
    """
    destination = "../data/{}/".format(des_string)
    if show_plot:
        line1, = mp.plot(signal1 / 1000., label="Channel 1")
        _, = mp.plot(signal2 / 1000., label="Channel 2")
        mp.rcParams.update({'font.size': 20})
        mp.locator_params(axis='y', nticks=3)
        mp.yticks()
        mp.xlabel("Samples (4000 per second)")
        mp.legend(handler_map={line1: HandlerLine2D(numpoints=4)})
        mp.ylabel("Milli volts")
        mp.show()
    m1_array = [signal1]
    m2_array = [signal2]
    """
    One in every n is in test and one in every n is in validation. Other (n-2) are in training.
    """
    test_data = (count % test_fraction) == 1
    validation_data = (count % test_fraction) == 2

    if not test_data and not validation_data:
        """
        Data augmentation step. Data augmentation is only done for the training dataset, not for testing or validation.
        """
        for noise in noises:
            for coefficient in noise_coefficients:
                # Add moise to the signal
                m1_array.append(add_noise(signal1, float(coefficient), noise))
                m2_array.append(add_noise(signal2, float(coefficient), noise))

        if apply_shift:
            # SHIFT RIGHT
            m1_array.append(np.roll(signal1, int(len(signal1) * 0.05)))
            m2_array.append(np.roll(signal2, int(len(signal2) * 0.05)))

            # SHIFT LEFT
            m1_array.append(np.roll(signal1, int(len(signal1) * 0.05 * -1)))
            m2_array.append(np.roll(signal2, int(len(signal2) * 0.05 * -1)))

        if apply_stretch:
            # Stretch the signal
            m1_array.append(stretch(signal1, 0.8))
            m2_array.append(stretch(signal2, 0.8))

            m1_array.append(stretch(signal1, 1.2))
            m2_array.append(stretch(signal2, 1.2))

    for i in range(len(m1_array)):
        """
        Convert the original and augmented data to their MFCC representations.
        """
        signal1 = m1_array[i]
        signal2 = m2_array[i]

        if len(signal1) < 1:
            continue

        m1 = mfcc(signal1,
                  samplerate=sampling_frequency,
                  numcep=numcep,
                  nfilt=nfilt,
                  highfreq=highfreq)
        m2 = mfcc(signal2,
                  samplerate=sampling_frequency,
                  numcep=numcep,
                  nfilt=nfilt,
                  highfreq=highfreq)

        if i == 0:
            lengths.append(len(m1))

        if show_plot:
            print(len(m1), len(m1[0]))

        if apply_interpolation:
            # Use interpolation to make the converted data have the same sizes
            m1 = interpolate(m1, new_length)
            m2 = interpolate(m2, new_length)

        if apply_padding:
            # Use padding to make the converted data have the same sizes
            m1 = pad(m1, new_length, True)
            m2 = pad(m2, new_length, True)

        if apply_stack:
            # Stack the signals to make them comparable to multi-channel inputs to the models. Helpful for CNNs.
            signal = np.dstack((m1, m2))
        else:
            m1 = m1.T
            m2 = m2.T
            signal = np.concatenate((m1, m2))
            signal = signal.T
            normalize(signal, axis=0)

        if show_plot:
            """
            Show converted MFCC representations
            """
            if i == 1:
                fig, ax = mp.subplots()
                mfcc_data = np.swapaxes(m1, 0, 1)
                ax.imshow(mfcc_data, interpolation='nearest', origin='lower')
                ax.set_title('MFCC Representation of Channel 1')
                mp.xlabel("Time Frames")
                mp.ylabel("Mel Bands")
                mp.show()

                fig, ax = mp.subplots()
                mfcc_data = np.swapaxes(m2, 0, 1)
                ax.imshow(mfcc_data, interpolation='nearest', origin='lower')
                ax.set_title('MFCC Representation of Channel 2')
                mp.xlabel("Time Frames")
                mp.ylabel("Mel Bands")
                mp.show()
        """
        Round the converted signals to 4 decimal places only. Made unreachable for now.
        """
        if False:
            signal = np.round(signal, decimals=4)
        """
        Save the files
        """
        data = {"label": labels.index(label), "signal": signal}

        if not os.path.exists(destination):
            os.makedirs(destination)
        sub_destination = destination + subject + "/"
        if not os.path.exists(sub_destination):
            os.makedirs(sub_destination)
        if not os.path.exists(sub_destination + label):
            os.makedirs(sub_destination + label)
        if not os.path.exists(sub_destination + label + "/test"):
            os.makedirs(sub_destination + label + "/test")
        if not os.path.exists(sub_destination + label + "/validation"):
            os.makedirs(sub_destination + label + "/validation")

        if test_data:
            file_location = "{}{}/test/{}_{}_{}.hdf5".format(
                sub_destination, label, uuid, str(count), str(i))
        elif validation_data:
            file_location = "{}{}/validation/{}_{}_{}.hdf5".format(
                sub_destination, label, uuid, count, str(i))
        else:
            file_location = "{}{}/{}_{}_{}.hdf5".format(
                sub_destination, label, uuid, str(count), str(i))

        with h5py.File(file_location, 'w') as f:
            for k, v in data.items():
                f.create_dataset(k, data=np.array(v, dtype=float))
    return
コード例 #4
0
 def splitQuery(self, numImages, minTime, maxTime, splitAttempts):
     targetNumChunks = math.ceil(numImages / config.targetPhotosPerPage)
     for chunk in range(0, targetNumChunks):
         newMin = helper.interpolate(chunk, targetNumChunks, minTime, maxTime)
         newMax = helper.interpolate(chunk + 1, targetNumChunks, minTime, maxTime)
         self.queryTimeRange(newMin, newMax, splitAttempts + 1)