Example #1
0
def encode_lightcurve_twed(cluster_lcs, cluster_times, patches_lcs, patches_times, alpha=1, split=True):
    num_patches = len(patches_lcs)
    num_centroids = len(cluster_lcs)
    D = np.zeros((num_patches, num_centroids))

    for i in xrange(num_patches):
        for j in xrange(num_centroids):
            A, A_times = patches_lcs[i], patches_times[i]
            B, B_times = cluster_lcs[j], cluster_times[j]
            D[i, j] = twed(A, A_times, B, B_times, lam=twed_lambda, nu=twed_nu)

    # Transformado al espacio de cluster distance
    df = pd.DataFrame(D)
    mean = df.mean(axis=1)  # la media de cada columna
    df = df.div(-1)
    df = df.add(alpha * mean, axis=0)  # Dejar todos los valores en Mu - X
    if split:
        # Dejar todos los negativos en 0
        df1 = df.apply(lambda x: np.maximum(0, x))
        # Dejar todos los positivos en 0
        df2 = df.apply(lambda x: np.maximum(0, -x))
        df = df1.merge(df2, left_index=True, right_index=True)
    else:
        # Dejar todos los negativos en 0
        df = df.apply(lambda x: np.maximum(0, x))
    return df
Example #2
0
def pairwise_tweds(pairwise_tweds_file, lcs, times, part, num_parts,
                   lam=0.5, nu=1e-5):
    """
    For parallelization in many jobs, we calculate how many pairwise distances
    we need to calculate, and then figure out the border indexes of the ones we
    need to do in the current job
    """
    N = len(lcs)
    num_tweds = N*(N + 1)/2
    tweds_per_part = num_tweds/num_parts
    begin_index = (part - 1)*tweds_per_part
    if part == num_parts:
        end_index = num_tweds
    else:
        end_index = tweds_per_part*part
    print "N: {0}".format(N)
    print "num_tweds: {0}".format(num_tweds)
    print "tweds_per_part: {0}".format(tweds_per_part)
    print "begin_index: {0}".format(begin_index)
    print "end_index: {0}".format(end_index)
    print "part: {0}".format(part)
    print "num_parts: {0}".format(num_parts)
    D = np.zeros((N, N))
    """
    twed_file = h5py.File(pairwise_tweds_file + ".hdf5", "w")
    D = twed_file.create_dataset("twed_matrix_part", (tweds_per_part, ),
                                 compression="gzip",
                                 scaleoffset=10,
                                 shuffle=True)
    D.attrs["N"] = N
    D.attrs["part"] = part
    D.attrs["num_parts"] = num_parts
    """
    k = 0
    for current_twed in xrange(begin_index, end_index):
        i, j = get_coords(N, current_twed + 1)
        # starting from i saves time since matrix is symmetric
        # if k % 1000 == 0:
            # print "{0} of {1}".format(k, tweds_per_part)
        twed_val = twed(lcs[i], times[i], lcs[j], times[j], lam=lam, nu=nu)
        complex_coeff = complexity_coeff(lcs[i], times[i], lcs[j], times[j])
        final_val = twed_val*complex_coeff
        # D[current_twed - begin_index] = twed_val
        D[i, j] = final_val
        D[j, i] = final_val
        k += 1
    # return twed_file
    return D