コード例 #1
0
def main():
	print("Gill Bates vs Beff Jezos")
	print("Who's going to win?")
	#subtitle_file = input("Subtitles SRT:  ")
	#transcript_file = input("Google Protobuf transcript:  ")
	#amazon_file = input("Amazon JSON transcript: ")

	subtitle_file = "500_days.srt"
	transcript_file = "500_days_transcribed_audio.pb"
	amazon_file = "500_days_amazon.json"

	# load the data	
	transcript_collection = load_proto_file(transcript_file)
	subs = parse_subs(subtitle_file)
	amazon_text, amazon_timings = amazon_transcript(amazon_file)
	google_timings = get_time_transcript(transcript_collection.audiobits)

	# get text data ready for processing
	subs_align = get_text_ready(subs, False)
	tran_google = get_text_ready(transcript_collection.audiobits, False)
	tran_amazon = get_text_ready(amazon_text, True)

	print("Starting the alignment...")
	dist, cost, acc, path = fastdtw(np.array(subs_align), np.array(tran_google), edit_distance)
	print("Finished warping ONE")
	dist2, cost2, acc2, path2 = fastdtw(np.array(subs_align), np.array(tran_amazon), edit_distance)
	print("Finished warping TWO. Starting aligning...")

	# find the path from 0,0 to the end to get the timings	
	google_path_pairs = find_timings_for_words(path)
	amazon_path_pairs = find_timings_for_words(path2) 
	
	output_timings("google", google_path_pairs, subs_align, tran_google, google_timings)
	output_timings("amazon", amazon_path_pairs, subs_align, tran_amazon, amazon_timings)
コード例 #2
0
    def cluster(self,
                res,
                n_cluster,
                by_feature,
                method='kmeans',
                normalized=False,
                **kwargs):
        stations = list(set(res.index.get_level_values(0)))
        series, names = [], []

        def transform(x):
            return x / np.max(np.abs(x)) if normalized else x

        def feature(data):
            if by_feature == "Combined":
                return data[["Checkout", "Return"]].values.flatten()
            else:
                return data[by_feature]

        for st in stations:
            data = res.loc[st]
            if len(data) < 24:
                data = self.clean_record(data)
            if res.loc[st].Factor.iloc[0] < 30 or np.isnan(feature(data)).any()\
                or (feature(data) == 0).all():
                continue
            series.append(transform(feature(data)))
            names.append(st)
        if method == 'kmeans':
            km = KMeans(init='k-means++', n_clusters=n_cluster)
            labels = km.fit_predict(np.array(series))
        elif method == 'agglomerative':
            n = len(series)
            simmat = np.zeros((n, n))
            for i in range(n):
                simmat[i, i] = 0
                for j in range(i + 1, n):
                    simmat[i, j], _, _, _ = fastdtw(
                        series[i], series[j], dist=lambda x, y: np.abs(x - y))
                    simmat[j, i] = simmat[i, j]
            labels = agg_cluster(simmat,
                                 n_cluster,
                                 linkage=kwargs.get('linkage', 'average'))
        path_name = '{}_{}_{}'.format(method, self.bikesystem.city, n_cluster)
        path_name = path_name + "_normalized" if normalized else path_name
        base_dir = os.path.join('clusters', path_name)
        if os.path.exists(base_dir):
            rmtree(base_dir)
        from_dir = os.path.join('temporal_deltas',
                                '{}_Hourly'.format(self.bikesystem.city))
        for i in range(n_cluster):
            path_name = os.path.join(base_dir, str(i))
            os.makedirs(path_name)
        for name, label in zip(names, labels):
            file_name = fname(name) + ".pdf"
            copyfile(os.path.join(from_dir, file_name),
                     os.path.join(base_dir, str(label), file_name))
        return names, labels
コード例 #3
0
 def predict(self, x_test):
     pred = []
     err = []
     for id_test in range(len(x_test)):
         result = np.zeros(len(self.y))
         for id_train in range(len(self.X)):
             try:
                 min_dist = fastdtw(self.X[id_train], x_test[id_test],
                                    self.dist)
                 result[id_train] = min_dist
             except Exception as e:
                 print(self.y[id_train])
                 print(self.X[id_train])
                 print(x_test[id_test])
                 print(e)
         if (self.normalize):
             result = self.norm(result)
         res_indx = result.argsort()[:self.neighbours]
         pred.append((self.y[res_indx], result[res_indx]))
     return pred
コード例 #4
0
                                np.maximum(x, softphoc_query)) / len(qword)
                            for x in resized_preds
                        ])

                    elif similarity_type == 4:  #dtw
                        resized_preds = [
                            (cv2.resize(x, (target_size[0], target_size[1])))
                            for x in softphoc_proposal_preds
                        ]  ## for axes oriented boxes
                        # resized_preds = [warped_logits] ## for warped non axes oriented boxes

                        # chars = [char2int[x] for x in qword]
                        # similarities = -np.mean([fastdtw(softphoc_query[:, :, x].transpose(), resized_preds[0][:, :, x].transpose(), 'euclidean')[0] for x in chars])
                        similarities = -np.mean([
                            fastdtw(softphoc_query[:, :, x].transpose(),
                                    resized_preds[0][:, :, x].transpose(),
                                    'euclidean')[0] for x in range(38)
                        ])

                    else:
                        raise Exception('Unknown similarity type')

                    sorted_idx = np.argsort(similarities)

                    if take_always_argmax:
                        idx = sorted_idx[-1]
                    else:
                        # idx = sorted_idx[-query_dict[qword]]
                        idx = sorted_idx[np.maximum(-query_dict[qword],
                                                    -len(sorted_idx))]
コード例 #5
0
                                np.maximum(x, softphoc_query)) / len(qword)
                            for x in resized_preds
                        ])

                    elif similarity_type == 4:  #dtw
                        similarities = []
                        im = np.copy(img)
                        for line in houghTransform_proposals:
                            cv2.line(im, (line[0], line[1]),
                                     (line[2], line[3]), (0, 255, 0), 2)
                            pred_points_array = get_points_lines(
                                line, probabilities_np)
                            # resized_preds = [(cv2.resize(x, (target_size[0]))) for x in pred_points_array]
                            # similarities = -np.mean([fastdtw(softphoc_query[:, :, x].transpose(), resized_preds[0][:, :, x].transpose(), 'euclidean')[0] for x in range(38)])
                            dist, cost, acc, path = fastdtw(
                                np.squeeze(softphoc_query), pred_points_array,
                                'euclidean')
                            similarities.append(-dist)

                        plt.imshow(im)
                        plt.title(qword)
                        plt.show()
                    else:
                        raise Exception('Unknown similarity type')

                    sorted_idx = np.argsort(similarities)

                    if take_always_argmax:
                        idx = sorted_idx[-1]
                    else:
                        # idx = sorted_idx[-query_dict[qword]]
コード例 #6
0
def dtw_metric(x, y):
    x = x.reshape(-1,1)
    y = y.reshape(-1,1)
    dist, cost, acc, path = fastdtw(x, y, dist = lambda x, y: norm(x - y, ord=1))
    return dist
コード例 #7
0
            priceLow = sortedSmoothedSample[0][1]
            for j in range(arcLength):
                normalizedSmoothedSample[j][1] = (
                    smoothedSample[j][1] -
                    priceLow) / priceRange * circleRadius

            midpoint = int(arcLength / 2)
            diff = abs(normalizedSmoothedSample[midpoint][1] -
                       template[midpoint][1])

            for j in range(arcLength):
                normalizedSmoothedSample[j][
                    1] = normalizedSmoothedSample[j][1] - diff
                normalizedSmoothedSample[j][0] = j

            acc = fastdtw(normalizedSmoothedSample, template)
            similarity_distance = acc[arcLength - 1][arcLength - 1]
            sim_list.append(similarity_distance)

            if similarity_distance < similarity_threshold * arcLength:

                left = smoothedSample[0][1]
                right = smoothedSample[-1][1]
                slope = abs(left - right) / min(left, right)

                if slope <= horizon_threshold:
                    dateIndex.append(i)

                    # record the pattern found under current time span
                    startday = i
                    endday = i + arcLength - 1
コード例 #8
0
def dtw_worker(i, t_row, df, df_test):
    # print 'Worker:', i
    test_list = []
    neighbours = []
    tlat = []
    tlong = []
    t_trajectories = ast.literal_eval(t_row[0])

    # Collect in list all the trajectories for this trip
    for j in range(0, len(t_trajectories)):
        test_list.append(
            [float(t_trajectories[j][1]),
             float(t_trajectories[j][2])])
        tlong.append(float(t_trajectories[j][1]))
        tlat.append(float(t_trajectories[j][2]))

    # Iterate over all trips in tripsClean
    start = time.time()
    for k, row in df.iterrows():
        clean_list = []
        trajectories = ast.literal_eval(row[2])

        for l in range(0, len(trajectories)):
            clean_list.append(
                [float(trajectories[l][1]),
                 float(trajectories[l][2])])

        # Compute DTW for these trips using Haversine as distance metric
        dist, cost, acc, path = dtw.fastdtw(
            test_list,
            clean_list,
            dist=lambda c1, c2: prep.haversine_dist(c1[0], c1[1], c2[0], c2[1]
                                                    ))
        # dist, path = fdtw.fastdtw(test_list, clean_list, dist=lambda c1, c2: prep.haversine_dist(c1[0], c1[1], c2[0], c2[1]))

        neighbours.append([int(row[0]), acc[-1][-1]])

    end = time.time()

    neighbours = np.asarray(neighbours)
    neighbours = neighbours[neighbours[:, 1].argsort()][:5]

    # print neighbours
    gmap = gmplot.GoogleMapPlotter(tlat[0], tlong[0], 10,
                                   'AIzaSyDf6Dk2_fg0p8XaEhQdFVCXg-AMlm54dAs')
    gmap.plot(tlat, tlong, 'green', edge_width=5)
    gmap.draw('Maps/dtwMaps/testTrip' + str(i + 1) + '/test-' + str(i + 1) +
              '.html')
    # print "Test Trip ", i, "\n"
    filename = 'Maps/dtwMaps/testTrip' + str(i + 1) + '/data' + str(i +
                                                                    1) + '.txt'
    open(filename, 'w').close()
    f = open(filename, "a+")
    for n in range(0, 5):
        for g, grow in df[df['tripId'] == neighbours[n][0]].iterrows():
            gtrajectory = ast.literal_eval(grow[2])
            longlist = []
            latlist = []

            for j in range(0, len(gtrajectory)):
                longlist.append(float(gtrajectory[j][1]))
                latlist.append(float(gtrajectory[j][2]))

            gmap = gmplot.GoogleMapPlotter(
                latlist[0], longlist[0], 10,
                'AIzaSyDf6Dk2_fg0p8XaEhQdFVCXg-AMlm54dAs')
            gmap.plot(latlist, longlist, 'green', edge_width=5)
            gmap.draw('Maps/dtwMaps/testTrip' + str(i + 1) + '/neighbour' +
                      str(n + 1) + '-' + str(grow[1]) + '.html')

            f.write("Neighbor %d \nJP_ID: %s \nDTW: %8.5f\n" %
                    (n, grow[1], float(neighbours[n][1])))
            f.write("dt: %8.5f\n\n" % float(end - start))
    f.close()
コード例 #9
0
import dtw
import numpy as np
import matplotlib.pyplot as plt
# from numpy.linalg import norm

x = np.array(list(range(15, 20))).reshape(-1, 1)
y = np.array(list(range(5, 10))).reshape(-1, 1)


dist, cost, acc, path = dtw.fastdtw(x, y, dist=lambda x, y: abs(x-y))
dist
plt.imshow(acc.T, origin='lower', cmap=plt.cm.gray, interpolation='nearest')
plt.plot(path[0], path[1], 'w')
plt.xlim((-0.5, acc.shape[0]-0.5))
plt.ylim((-0.5, acc.shape[1]-0.5))
plt.plot(list(range(len(x))), x)
plt.plot(y, list(range(len(y))))
plt.show()
# 这个dtw距离似乎会随着时间序列的长度改变啊!这个可咋整呢?
# 不过经过实验之后发现,虽然会变,但是还是会收敛的
コード例 #10
0
 def dtw_dist(self, other):
     x = np.array(self.coords)
     y = np.array(other.coords)
     #distance, path = fastdtw(x, y,radius=20, dist=lambda x, y: abs(x-y))
     distance, C, D1, path = fastdtw(x, y, dist=lambda x, y: abs(x - y))
     return distance
コード例 #11
0
                                  key=lambda sortedPeriod: sortedPeriod[1])
            periodRange = sortedPeriod[lenPeriod - 1][1] - sortedPeriod[0][1]
            for i in range(lenPeriod):
                curPeriod[i][1] = (curPeriod[i][1] - sortedPeriod[0][1]
                                   ) / periodRange * (lenPeriod / 2.0)
            # print(periodRange)
            if startDate == 10 and endDate == 41:
                print(lenPeriod / 2.0)

            midpoint = int(lenPeriod / 2)
            diff = abs(curPeriod[midpoint][1] - curTemplate[midpoint][1])
            for i in range(lenPeriod):
                curPeriod[i][1] = curPeriod[i][1] - diff
                curPeriod[i][0] = i

            acc = fastdtw(curPeriod, curTemplate)
            curSimilarityDist = acc[lenPeriod - 1][lenPeriod - 1]

            if curSimilarityDist <= SIM_THRESHOLD * lenPeriod:
                left = smoothedData[startDate][1]
                right = smoothedData[endDate][1]
                slope = abs(left - right) / min(left, right)
                # if match
                # leftBound = endDate + 1     dateInc = TIMESPAN_MIN    save the arc
                if slope <= CIRCLE_HORIZONTAL_THRESHOLD:
                    distList.append(curSimilarityDist / lenPeriod)
                    dateInc = TIMESPAN_MIN
                    leftBound = endDate + 1
                    pricePatternList.append([startDate, endDate])
                    break
コード例 #12
0
import numpy as np
import matplotlib.pyplot as plt
from dtw import fastdtw
import matplotlib as mpl
from arc import arc_circle_gen
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
import math
from template import template_gen

tem1 = arc_circle_gen(32, 16)
tem2 = template_gen('circle', 32, 16, 1.0)
print(tem1)
print(tem2)

tem1rev = []
for i in range(30):
    tem1rev.append([tem1[i][0], 15-tem1[i][1]])

print(fastdtw(tem1, tem2)[29][29])
print(fastdtw(tem1rev, tem2)[29][29])