# Name of files that will be saved
    tag_name = args.tag

    # ---------------
    # DATA PIPELINE.
    # ----------------
    # 1. Get the .NC files which contain the weather forecasts from
    # 11 different global numerical weather prediction models. Average
    # the data spatially.
    print "Make or grab the training data, averaging spatially..."
    trainX, trainY, testX = assemble_data(tag_name, meth, debug, nclose,
                                          station_info, model_num,
                                          train_gefs_files, test_gefs_files)
    print 'Training sizes:'
    print trainX.shape, trainY.shape

    # 2. Normalize the features, use both train and testing
    # data for the feature normalization to fully encompass the
    # range of X values.
    print "Normalizing data..."
    trainX, trainY, testX, xcoeff, ycoeff = \
        normalize_data(trainX, trainY, testX)
    # 3. Fit the data. Here we use our supervised learning model
    # to find statistically significant correlations between the features
    # (the 12, 15, 18, 21, 24 hours ahead weather forecast) and the prediction
    # variable (the actual total solar energy produced at a given Mesonet
    # station).
    print "Fitting data..."
    fit_data(trainX, trainY, num_rand, ycoeff, testX, tag_name, True)
    print 'Finished.'
Exemple #2
0
def sampen2(data, mm=2, r=0.2, normalize=False):
    """
    Calculates an estimate of sample entropy and the variance of the estimate.

    :param data: The data set (time series) as a list of floats.
    :type data: list

    :param mm: Maximum length of epoch (subseries).
    :type mm: int

    :param r: Tolerance. Typically 0.1 or 0.2.
    :type r: float

    :param normalize: Normalize such that the mean of the input is 0 and
    the sample, variance is 1.
    :type normalize: bool

    :return: List[(Int, Float/None, Float/None)...]

    Where the first (Int) value is the Epoch length.
    The second (Float or None) value is the SampEn.
    The third (Float or None) value is the Standard Deviation.

    The outputs are the sample entropies of the input, for all epoch lengths of
    0 to a specified maximum length, m.

    If there are no matches (the data set is unique) the sample entropy and
    standard deviation will return None.

    :rtype: list
    """

    n = len(data)

    if n == 0:
        raise ValueError("Parameter `data` contains an empty list")

    if mm > n / 2:
        raise ValueError(
            "Maximum epoch length of %d too large for time series of length "
            "%d (mm > n / 2)" % (
                mm,
                n,
            ))

    mm += 1

    mm_dbld = 2 * mm

    if mm_dbld > n:
        raise ValueError(
            "Maximum epoch length of %d too large for time series of length "
            "%d ((mm + 1) * 2 > n)" % (
                mm,
                n,
            ))

    if normalize is True:
        data = normalize_data(data)

    # initialize the lists
    run = [0] * n
    run1 = run[:]

    r1 = [0] * (n * mm_dbld)
    r2 = r1[:]
    f = r1[:]

    f1 = [0] * (n * mm)
    f2 = f1[:]

    k = [0] * ((mm + 1) * mm)

    a = [0] * mm
    b = a[:]
    p = a[:]
    v1 = a[:]
    v2 = a[:]
    s1 = a[:]
    n1 = a[:]
    n2 = a[:]

    for i in range(n - 1):
        nj = n - i - 1
        y1 = data[i]

        for jj in range(nj):
            j = jj + i + 1

            if data[j] - y1 < r and y1 - data[j] < r:
                run[jj] = run1[jj] + 1
                m1 = mm if mm < run[jj] else run[jj]

                for m in range(m1):
                    a[m] += 1
                    if j < n - 1:
                        b[m] += 1
                    f1[i + m * n] += 1
                    f[i + n * m] += 1
                    f[j + n * m] += 1

            else:
                run[jj] = 0

        for j in range(mm_dbld):
            run1[j] = run[j]
            r1[i + n * j] = run[j]

        if nj > mm_dbld - 1:
            for j in range(mm_dbld, nj):
                run1[j] = run[j]

    for i in range(1, mm_dbld):
        for j in range(i - 1):
            r2[i + n * j] = r1[i - j - 1 + n * j]
    for i in range(mm_dbld, n):
        for j in range(mm_dbld):
            r2[i + n * j] = r1[i - j - 1 + n * j]
    for i in range(n):
        for m in range(mm):
            ff = f[i + n * m]
            f2[i + n * m] = ff - f1[i + n * m]
            k[(mm + 1) * m] += ff * (ff - 1)
    m = mm - 1
    while m > 0:
        b[m] = b[m - 1]
        m -= 1
    b[0] = float(n) * (n - 1.0) / 2.0
    for m in range(mm):
        #### added
        if float(b[m]) == 0:
            p[m] = 0.0
            v2[m] = 0.0
        else:
            p[m] = float(a[m]) / float(b[m])
            v2[m] = p[m] * (1.0 - p[m]) / b[m]
    for m in range(mm):
        d2 = m + 1 if m + 1 < mm - 1 else mm - 1
        for d in range(d2):
            for i1 in range(d + 1, n):
                i2 = i1 - d - 1
                nm1 = f1[i1 + n * m]
                nm3 = f1[i2 + n * m]
                nm2 = f2[i1 + n * m]
                nm4 = f2[i2 + n * m]
                # if r1[i1 + n * j] >= m + 1:
                #    nm1 -= 1
                # if r2[i1 + n * j] >= m + 1:
                #    nm4 -= 1
                for j in range(2 * (d + 1)):
                    if r2[i1 + n * j] >= m + 1:
                        nm2 -= 1
                for j in range(2 * d + 1):
                    if r1[i2 + n * j] >= m + 1:
                        nm3 -= 1
                k[d + 1 + (mm + 1) * m] += float(2 * (nm1 + nm2) * (nm3 + nm4))

    n1[0] = float(n * (n - 1) * (n - 2))
    for m in range(mm - 1):
        for j in range(m + 2):
            n1[m + 1] += k[j + (mm + 1) * m]
    for m in range(mm):
        for j in range(m + 1):
            n2[m] += k[j + (mm + 1) * m]

    # calculate standard deviation for the set
    for m in range(mm):
        v1[m] = v2[m]
        ##### added
        if b[m] == 0:
            dv = 0.0
        else:
            dv = (n2[m] - n1[m] * p[m] * p[m]) / (b[m] * b[m])
        if dv > 0:
            v1[m] += dv
        s1[m] = math.sqrt(v1[m])

    # assemble and return the response
    response = []
    for m in range(mm):
        if p[m] == 0:
            # Infimum, the data set is unique, there were no matches.
            response.append((m, None, None))
        else:
            response.append((m, -math.log(p[m]), s1[m]))
    return response
  pose0_inv = np.linalg.inv(poses[0])

  # for KITTI dataset, we need to convert the provided poses 
  # from the camera coordinate system into the LiDAR coordinate system  
  poses_new = []
  for pose in poses:
    poses_new.append(T_velo_cam.dot(pose0_inv).dot(pose).dot(T_cam_velo))
  poses = np.array(poses_new)

  # generate overlap and yaw ground truth array
  # ground_truth_mapping = com_function_angle(scan_paths, poses, frame_idx=0)
  funcangle_file = '/home/cel/CURLY/code/DockerFolder/data/kitti/sequences/07_overlap/preprocess_data_demo/07_all.csv'
  ground_truth_mapping = read_function_angle_com_yaw(scan_paths, poses, funcangle_file)
  
  # normalize the distribution of ground truth data
  dist_norm_data = normalize_data(ground_truth_mapping)
  
  # split ground truth for training and validation
  train_data, validation_data = split_train_val(dist_norm_data)
  
  # add sequence label to the data and save them as npz files
  seq_idx = '07'
  # specify the goal folder
  dst_folder = os.path.join(dst_folder, 'ground_truth_2')
  try:
    os.stat(dst_folder)
    print('generating depth data in: ', dst_folder)
  except:
    print('creating new depth folder: ', dst_folder)
    os.mkdir(dst_folder)
    
def process_training_data(directory, n_chunks, cut_attack_conf):
    file_data = process_file_names(directory)
    training_data = [[0] * 2 for _ in range(len(file_data))]
    onset_length_ms = 100

    for i in range(len(file_data)):
        print("{:d} of {:d}", i, len(file_data))
        filename = file_data[i][2]

        try:
            data, sample_rate = sf.read(filename)
        except RuntimeError:
            print("Cannot open file {}".format(filename))
            exit()

        if not cut_attack_conf:
            only_attack, without_attack = cut_attack(filename, onset_length_ms)

            if cut_attack_conf is "only_attack":
                data = only_attack
            elif cut_attack_conf is "without_attack":
                data = without_attack
            else:
                print("cut_attack_conf Error! Must be \"only_attack\" or \"without_attack\"")
                exit()

        n_samples = len(data)

        # do Fourier Transform
        y_fft = abs(np.fft.fft(data))
        y_fft = y_fft[:n_samples // 2]
        f = sample_rate * np.array(range(n_samples)) / n_samples
        f = f[1:round(1000 * n_samples / sample_rate)]

        tone = file_data[i][1]
        frequency_diff = frequency_difference(tone)
        shifted_fft = shift_frequency(y_fft, frequency_diff, sample_rate, n_samples)

        shifted_fft = shifted_fft[:round(1000 * n_samples / sample_rate)]

        feature_vector = create_feature_vector(shifted_fft, n_chunks)
        feature_vector = normalize_data(feature_vector)

        training_data[i][0] = file_data[i][0]
        training_data[i][1] = feature_vector

    input_data = [row[1] for row in training_data]
    input_data = list(map(list, zip(*input_data)))  # transpose
    label_data = [row[0] for row in training_data]

    instrument_set = set(label_data)
    instrument_value = list(range(len(instrument_set)))
    instruction_label_map = dict(zip(instrument_set, instrument_value))

    mapped_label_data = np.zeros((len(instrument_set), len(label_data)))

    for i in range(len(label_data)):
        instrument = label_data[i]
        mapped_label_data[instruction_label_map[instrument]][i] = 1

    return input_data, mapped_label_data.tolist()