def main(arguments):
    #Clean the databases or create the directory
    try:
        shutil.rmtree('dbs')
    except:
        pass
    os.mkdir('dbs')

    #Parse the number of vantage points wanted
    parser = argparse.ArgumentParser(
        description='Number of vantage points to generate')
    parser.add_argument('--n',
                        help='Number of vantage points',
                        type=int,
                        default=20)
    parser.add_argument('--nts',
                        help='Number of vantage points',
                        type=int,
                        default=1000)
    args = parser.parse_args(arguments)
    n = args.n
    nts = args.nts

    #sample the vantage points without replacement
    vantage = random.sample(range(nts), n)
    pickle.dump(vantage, open("ts_data/vantage_points.p", "wb"))
    print('Vantage Points are: ')
    for i in vantage:
        print('TS ' + str(i))

    tss = []

    #load the ts
    print('Loading the TS')
    for i in range(nts):
        tss.append(pickle.load(open("ts_data/ts_{}.p".format(i), "rb")))

    #Store the negative of the k_corr of each vantage points to the eveyr other timeseries in the DB
    print('Creating the DBS')
    for ct, i in enumerate(vantage):
        print(str(ct + 1) + '/' + str(len(vantage)) + ' Done')
        db = binarytree.connect('dbs/db_' + str(i) + '.dbdb')
        ts_i_stand = stand(tss[i], tss[i].mean(), tss[i].std())
        for j in range(0, nts):
            if j == i:
                continue
            kc = kernel_corr(ts_i_stand,
                             stand(tss[j], tss[j].mean(), tss[j].std()))
            db.set(2 * (1 - kc), 'ts_' + str(j))
        db.commit()
        db.close()
Example #2
0
def main(arguments):
    parser = argparse.ArgumentParser(
        description='Number of timeseries to generate')
    parser.add_argument('ts', help="TimeSeries of interest", type=str)
    parser.add_argument('--n', help='Top N similar', type=int, default=1)
    parser.add_argument('--save',
                        help='Save Results',
                        type=bool,
                        default=False)
    parser.add_argument('--f',
                        help='Path to results folder',
                        type=str,
                        default='results.p')
    args = parser.parse_args(arguments)
    ts_name = args.ts
    n = args.n
    f = args.f
    save = args.save
    ts = pickle.load(open(ts_name + ".p", "rb"))
    ts_stand = util.stand(ts, ts.mean(), ts.std())
    vantage = pickle.load(open("ts_data/vantage_points.p", "rb"))
    if n == 1:
        closest_vantage, closest_distance = eval_closest_vantage(
            ts_stand, vantage)
        closest_in_all = in_radius_from_vantage(ts_stand, closest_vantage[0])
    else:
        closest_in_all = []
        for v in vantage:
            closest_in_all += in_radius_from_vantage(ts_stand, v)
        closest_in_all = list(set(closest_in_all))

    return_top(ts_stand, closest_in_all, n, save, f)
 def test_standardisation(self):
     #Checks that standarisation is correct
     t1 = util.ts.ArrayTimeSeries(range(10), range(10))
     stand_t1 = util.stand(t1, t1.mean(), t1.std())
     m = np.mean(t1._value)
     v = np.std(t1._value)
     self.assertTrue(all(abs(stand_t1._value[i]-((t1._value-m)/v)[i])<0.000000001 for i in range(len(t1))))
Example #4
0
def return_top(ts_stand, oglist, n, ret=False, file='', test=False):
    '''
    Prints or saves the n closest timeseries to ts_stand from a list of timeseries names (i.e. as string)
    '''
    if n > 1:
        topn = []
        for k in oglist:
            if test:
                v = pickle.load(open("../ts_data/{}.p".format(k), "rb"))
            else:
                v = pickle.load(open("ts_data/{}.p".format(k), "rb"))
            v_stand = util.stand(v, v.mean(), v.std())
            curr_distance = 2 * (1 - util.kernel_corr(ts_stand, v_stand))
            if len(topn) < n:
                topn.append((k, curr_distance))
            else:
                val = [x[1] for x in topn]
                i = val.index(max(val))
                if val[i] < curr_distance:
                    continue
                else:
                    topn[i] = (k, curr_distance)
        topn.sort(key=lambda x: x[1])
        # return topn
        if not ret:
            print('Results: \n')
            for k, v in topn:
                print(k + ':', v)
        else:
            pickle.dump(topn, open(file, "wb"))
    else:
        m = float("inf")
        best = None
        for k in oglist:
            if test:
                v = pickle.load(open("../ts_data/{}.p".format(k), "rb"))
            else:
                v = pickle.load(open("ts_data/{}.p".format(k), "rb"))
            v_stand = util.stand(v, v.mean(), v.std())
            curr_distance = 2 * (1 - util.kernel_corr(ts_stand, v_stand))
            if m > curr_distance:
                m = curr_distance
                best = k
        if not ret:
            print('Closest: ' + best + ', Distance: ' + str(m))
        else:
            pickle.dump([m, best], open(file, "wb"))
Example #5
0
def sanity(ts_stand, n):
    "This function is a sanity check used in the testing file: brute force n closest"
    ds = []
    for i in range(1000):
        v = pickle.load(open("../ts_data/ts_{}.p".format(i), "rb"))
        v_stand = util.stand(v, v.mean(), v.std())
        ds.append(('ts_data/ts_' + str(i) + '.p',
                   2 * (1 - util.kernel_corr(ts_stand, v_stand))))
    return sorted(ds, key=lambda d: d[1])[:n]
 def test_search_closest(self):
     #Checks that we can correctly report the closest point in the DB
     ts = pickle.load(open("../ts_data/ts_0.p", "rb" ))
     ts_stand = util.stand(ts, ts.mean(), ts.std())
     vantage = pickle.load(open( "../ts_data/vantage_points.p", "rb"))
     closest_vantage, closest_distance  = simsearch.eval_closest_vantage(ts_stand, vantage, True)
     closest_in_all = simsearch.in_radius_from_vantage(ts_stand, closest_vantage[0], True)
     top_m, top_b = simsearch.return_top(ts_stand, closest_in_all, 1, False, True)
     san = simsearch.sanity(ts_stand, 1)[0]
     self.assertEqual('ts_data/'+top_b+'.p', san[0])
 def test_search_topn(self):
     #Checks that we can correctly report the n closest points in the DB
     ts = pickle.load(open("../ts_data/ts_0.p", "rb" ))
     ts_stand = util.stand(ts, ts.mean(), ts.std())
     vantage = pickle.load(open( "../ts_data/vantage_points.p", "rb"))
     closest_in_all = []
     for v in vantage:
         closest_in_all += simsearch.in_radius_from_vantage(ts_stand, v, True)
     closest_in_all = list(set(closest_in_all))
     te=simsearch.return_top(ts_stand, closest_in_all, 10, False, True)
     san = simsearch.sanity(ts_stand, 10)
     self.assertTrue(all(san[i][0]=='ts_data/'+te[i][0]+'.p' for i in range(len(san))))
Example #8
0
def in_radius_from_vantage(ts_stand, v_str, test=False):
    '''
    Return the points in the circle of radius 2*k_corr(ts_stand, vantage_stand)
    Args:
    -----
    - ts_stand: standardised ArrayTimeSeries
    - v_str: index of the vantage point as a string
    '''
    if test:
        v = pickle.load(open("../ts_data/ts_{}.p".format(v_str), "rb"))
    else:
        v = pickle.load(open("ts_data/ts_{}.p".format(v_str), "rb"))
    v_stand = util.stand(v, v.mean(), v.std())
    d = 2 * (1 - util.kernel_corr(ts_stand, v_stand))
    if not test:
        db = binarytree.connect('dbs/db_{}.dbdb'.format(v_str))
    else:
        db = binarytree.connect('../dbs/db_{}.dbdb'.format(v_str))
    closest_in_vantage = [x[1] for x in db.get_closer_than(2 * d)]
    db.close()
    return closest_in_vantage
Example #9
0
def eval_closest_vantage(ts_stand, vantage, test=False):
    '''
    Return the index of the closest vantage point
    Args:
    -----
    - ts_stand: standardised ArrayTimeSeries
    - vantage point: list of indices of vantage points
    '''
    closest_vantage = None
    closest_distance = float("inf")
    for i in vantage:
        if test:
            v = pickle.load(open("../ts_data/ts_{}.p".format(i), "rb"))
        else:
            v = pickle.load(open("ts_data/ts_{}.p".format(i), "rb"))
        v_stand = util.stand(v, v.mean(), v.std())
        curr_distance = 2 * (1 - util.kernel_corr(ts_stand, v_stand))
        if curr_distance < closest_distance:
            if test:
                closest_vantage = (i, "../ts_data/ts_{}.p".format(i))
            else:
                closest_vantage = (i, "ts_data/ts_{}.p".format(i))
            closest_distance = curr_distance
    return closest_vantage, closest_distance