Ejemplo n.º 1
0
def main(tseries_fpath, train_fpath, test_fpath, assign_fpath, out_folder):

    Xtrain = ioutil.load_series(tseries_fpath, train_fpath)
    Xtest = ioutil.load_series(tseries_fpath, test_fpath)
    y_train = np.genfromtxt(assign_fpath, dtype='i')

    max_pts = Xtest.shape[1]
    for num_pts in [1, 25, 50, 75]:
        probs = fit(Xtrain, y_train, Xtest, num_pts)

        probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts)
        np.savetxt(probs_fpath, probs)
Ejemplo n.º 2
0
def main(tseries_fpath, train_fpath, test_fpath, assign_fpath, out_folder):
    
    Xtrain = ioutil.load_series(tseries_fpath, train_fpath)
    Xtest = ioutil.load_series(tseries_fpath, test_fpath)
    y_train = np.genfromtxt(assign_fpath, dtype='i')

    max_pts = Xtest.shape[1]
    for num_pts in [1, 25, 50, 75]:
        probs = fit(Xtrain, y_train, Xtest, num_pts)

        probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts)
        np.savetxt(probs_fpath, probs)
Ejemplo n.º 3
0
def run_fold(folder, tseries_fpath, min_pts, thetas, out_folder):

    try:
        os.makedirs(out_folder)
    except:
        pass

    train_fpath = os.path.join(folder, "train.dat")
    cents_fpath = os.path.join(folder, "ksc", "cents.dat")
    assign_fpath = os.path.join(folder, "ksc", "assign.dat")
    probs_folder = os.path.join(folder, "probs")

    X = ioutil.load_series(tseries_fpath, train_fpath)
    train_idx = np.loadtxt(train_fpath, dtype="bool")
    y_true = np.loadtxt(assign_fpath)

    num_series = X.shape[0]
    max_pts = X.shape[1]

    # Since we prune the first 100 lines of X we need to read other info
    peak_days = []
    sum_views = []
    with open(tseries_fpath) as tseries_file:
        for i, line in enumerate(tseries_file):
            if train_idx[i]:
                x = np.array([int(v) for v in line.split()[1:]])
                peak_days.append(x.argmax())
                sum_views.append(x.sum())

    peak_days = np.array(peak_days)
    sum_views = np.array(sum_views)

    y_pred, best_by, confs, all_confs = pred(probs_folder, num_series, max_pts, min_pts, thetas)
    save_results(X, peak_days, sum_views, min_pts, thetas, best_by, all_confs, y_true, y_pred, confs, out_folder)
Ejemplo n.º 4
0
def run_fold(folder, tseries_fpath, min_pts, thetas, out_folder, gamma_max):

    try:
        os.makedirs(out_folder)
    except:
        pass

    test_fpath = os.path.join(folder, 'test.dat')
    cents_fpath = os.path.join(folder, 'ksc', 'cents.dat')
    assign_fpath = os.path.join(folder, 'ksc', 'test_assign.dat')
    probs_folder = os.path.join(folder, 'probs-test')

    X = ioutil.load_series(tseries_fpath, test_fpath)
    test_idx = np.loadtxt(test_fpath, dtype='bool')
    y_true = np.loadtxt(assign_fpath)
    
    num_series = X.shape[0]
    max_pts = gamma_max
    
    peak_days = []
    sum_views = []
    with open(tseries_fpath) as tseries_file:
        for i, line in enumerate(tseries_file):
            if test_idx[i]:
                x = np.array([int(v) for v in line.split()[1:]])
                peak_days.append(x.argmax())
                sum_views.append(x.sum())

    peak_days = np.array(peak_days)
    sum_views = np.array(sum_views)
  
    y_pred, best_by, confs, all_confs = \
            pred(probs_folder, num_series, max_pts, min_pts, thetas)
    save_results(X, peak_days, sum_views, min_pts, thetas, best_by, all_confs,
                 y_true, y_pred, confs, out_folder)
Ejemplo n.º 5
0
def run_fold(folder, tseries_fpath, min_pts, thetas, out_folder, gamma_max):

    try:
        os.makedirs(out_folder)
    except:
        pass

    test_fpath = os.path.join(folder, 'test.dat')
    cents_fpath = os.path.join(folder, 'ksc', 'cents.dat')
    assign_fpath = os.path.join(folder, 'ksc', 'test_assign.dat')
    probs_folder = os.path.join(folder, 'probs-test')

    X = ioutil.load_series(tseries_fpath, test_fpath)
    test_idx = np.loadtxt(test_fpath, dtype='bool')
    y_true = np.loadtxt(assign_fpath)
    
    num_series = X.shape[0]
    max_pts = gamma_max
    
    peak_days = []
    sum_views = []
    with open(tseries_fpath) as tseries_file:
        for i, line in enumerate(tseries_file):
            if test_idx[i]:
                x = np.array([int(v) for v in line.split()[1:]])
                peak_days.append(x.argmax())
                sum_views.append(x.sum())

    peak_days = np.array(peak_days)
    sum_views = np.array(sum_views)
  
    y_pred, best_by, confs, all_confs = \
            pred(probs_folder, num_series, max_pts, min_pts, thetas)
    save_results(X, peak_days, sum_views, min_pts, thetas, best_by, all_confs,
                 y_true, y_pred, confs, out_folder)
Ejemplo n.º 6
0
def main(tseries_fpath, test_fpath, cents_fpath):

    X = ioutil.load_series(tseries_fpath, test_fpath)

    C = np.loadtxt(cents_fpath)
    dist_cents = dist.dist_all(C, X, rolling=True)[0]
    y_true = dist_cents.argmin(axis=0)

    for t in y_true:
        print t
Ejemplo n.º 7
0
def main(tseries_fpath, base_folder, k):
    k = int(k)

    idx_fpath = os.path.join(os.path.join(base_folder, ".."), "train.dat")
    X = ioutil.load_series(tseries_fpath, idx_fpath)

    cent, assign, shift, dists_cent = ksc.inc_ksc(X, k)
    np.savetxt(os.path.join(base_folder, "cents.dat"), cent, fmt="%.5f")
    np.savetxt(os.path.join(base_folder, "assign.dat"), assign, fmt="%d")
    np.savetxt(os.path.join(base_folder, "shift.dat"), shift, fmt="%d")
    np.savetxt(os.path.join(base_folder, "dists_cent.dat"), dists_cent, fmt="%.5f")
Ejemplo n.º 8
0
def main(tseries_fpath, base_folder, k):
    k = int(k)
    
    idx_fpath = os.path.join(os.path.join(base_folder, '..'), 'train.dat')
    X = ioutil.load_series(tseries_fpath, idx_fpath)

    cent, assign, shift, dists_cent = ksc.inc_ksc(X, k)
    np.savetxt(os.path.join(base_folder, 'cents.dat'), cent, fmt='%.5f')
    np.savetxt(os.path.join(base_folder, 'assign.dat'), assign, fmt='%d')
    np.savetxt(os.path.join(base_folder, 'shift.dat'), shift, fmt='%d')
    np.savetxt(os.path.join(base_folder, 'dists_cent.dat'), dists_cent, 
               fmt='%.5f')
Ejemplo n.º 9
0
def main(tseries_fpath, centroids_fpath, test_fpath, assign_fpath, out_folder):
    
    C = np.genfromtxt(centroids_fpath)
    Xtest = ioutil.load_series(tseries_fpath, test_fpath)
    y_train = np.arange(C.shape[0])

    max_pts = Xtest.shape[1]
    for num_pts in range(1, max_pts + 1):
    #for num_pts in [1, 25, 50, 75]:
        probs = fit(C, y_train, Xtest, num_pts)

        probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts)
        np.savetxt(probs_fpath, probs)
Ejemplo n.º 10
0
def main(tseries_fpath, train_fpath, test_fpath, ytrain_fpath, ytest_fpath, out_folder):
    Xtrain = ioutil.load_series(tseries_fpath, train_fpath)
    Xtest  = ioutil.load_series(tseries_fpath, test_fpath)
    
    y_train = np.genfromtxt(ytrain_fpath)
    y_true = np.genfromtxt(ytest_fpath)
    max_pts = Xtrain.shape[1]

    best_by = np.zeros(Xtest.shape[0])
    min_conf = np.zeros(Xtest.shape[0])
    all_probs = np.zeros(shape=(Xtest.shape[0], max_pts))

    lousy_conf = 1.0 / len(set(y_train)) #if confidence is equal to this, classifier did nothing
    for num_pts in range(1, max_pts + 1):
        y_pred, probs = fit(Xtrain, y_train, Xtest, num_pts)

        for i in range(Xtest.shape[0]):
            p_true = probs[i, y_true[i]]
            if best_by[i] == 0 and y_pred[i] == y_true[i] and p_true > lousy_conf:
                best_by[i] = num_pts
                min_conf[i] = probs[i, y_true[i]]
            all_probs[i, num_pts - 1] = p_true

        summary_fpath = os.path.join(out_folder,\
                'class_summ-%d-pts.dat' % num_pts)
        probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts)

        with open(summary_fpath, 'w') as summary_file:
            print(classification_report(y_true, y_pred), file=summary_file)
        np.savetxt(probs_fpath, probs)
    
    best_fpath = os.path.join(out_folder, 'best-by.dat')
    conf_fpath = os.path.join(out_folder, 'conf.dat')
    all_conf_fpath = os.path.join(out_folder, 'all-conf.dat')

    np.savetxt(best_fpath, best_by)
    np.savetxt(conf_fpath, min_conf)
    np.savetxt(all_conf_fpath, np.asarray(all_probs))
Ejemplo n.º 11
0
def main(tseries_fpath, train_fpath, test_fpath, ytrain_fpath, ytest_fpath, out_folder):
    Xtrain = ioutil.load_series(tseries_fpath, train_fpath)
    Xtest = ioutil.load_series(tseries_fpath, test_fpath)

    y_train = np.genfromtxt(ytrain_fpath)
    y_true = np.genfromtxt(ytest_fpath)
    max_pts = Xtrain.shape[1]

    best_by = np.zeros(Xtest.shape[0])
    min_conf = np.zeros(Xtest.shape[0])
    all_probs = np.zeros(shape=(Xtest.shape[0], max_pts))

    lousy_conf = 1.0 / len(set(y_train))  # if confidence is equal to this, classifier did nothing
    for num_pts in range(1, max_pts + 1):
        y_pred, probs = fit(Xtrain, y_train, Xtest, num_pts)

        for i in xrange(Xtest.shape[0]):
            p_true = probs[i, y_true[i]]
            if best_by[i] == 0 and y_pred[i] == y_true[i] and p_true > lousy_conf:
                best_by[i] = num_pts
                min_conf[i] = probs[i, y_true[i]]
            all_probs[i, num_pts - 1] = p_true

        summary_fpath = os.path.join(out_folder, "class_summ-%d-pts.dat" % num_pts)
        probs_fpath = os.path.join(out_folder, "probs-%d-pts.dat" % num_pts)

        with open(summary_fpath, "w") as summary_file:
            print(classification_report(y_true, y_pred), file=summary_file)
        np.savetxt(probs_fpath, probs)

    best_fpath = os.path.join(out_folder, "best-by.dat")
    conf_fpath = os.path.join(out_folder, "conf.dat")
    all_conf_fpath = os.path.join(out_folder, "all-conf.dat")

    np.savetxt(best_fpath, best_by)
    np.savetxt(conf_fpath, min_conf)
    np.savetxt(all_conf_fpath, np.asarray(all_probs))
Ejemplo n.º 12
0
def main(tseries_fpath, centroids_fpath, test_fpath, assign_fpath, out_folder, 	
	 gamma_max):
    gamma_max = int(gamma_max)
    
    C = np.genfromtxt(centroids_fpath)
    Xtest = ioutil.load_series(tseries_fpath, test_fpath)
    y_train = np.arange(C.shape[0])

    max_pts = gamma_max
    for num_pts in range(1, max_pts + 1):
    #for num_pts in [1, 25, 50, 75]:
        probs = fit(C, y_train, Xtest, num_pts)

        probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts)
        np.savetxt(probs_fpath, probs)
Ejemplo n.º 13
0
def main(tseries_fpath, train_fpath, centroids_fpath, classes_fpath,
         out_folder, gamma_max):
    gamma_max = int(gamma_max)

    X = ioutil.load_series(tseries_fpath, train_fpath)
    C = np.genfromtxt(centroids_fpath, dtype='f')

    y_train = np.arange(C.shape[0])
    y_true = np.genfromtxt(classes_fpath, dtype='i')
    max_pts = gamma_max
    #max_pts = X.shape[1]

    best_by = np.zeros(X.shape[0])
    min_conf = np.zeros(X.shape[0])
    all_probs = np.zeros(shape=(X.shape[0], max_pts))

    lousy_conf = 1.0 / C.shape[
        0]  #if confidence is equal to this, classifier did nothing
    for num_pts in range(1, max_pts + 1):
        y_pred, probs = fit(C, y_train, X, y_true, num_pts)

        for i in xrange(X.shape[0]):
            p_true = probs[i, y_true[i]]
            if best_by[i] == 0 and y_pred[i] == y_true[
                    i] and p_true > lousy_conf:
                best_by[i] = num_pts
                min_conf[i] = probs[i, y_true[i]]
            all_probs[i, num_pts - 1] = p_true

        summary_fpath = os.path.join(out_folder,\
                'class_summ-%d-pts.dat' % num_pts)
        probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts)

        with open(summary_fpath, 'w') as summary_file:
            print(classification_report(y_true, y_pred), file=summary_file)
        np.savetxt(probs_fpath, probs)

    best_fpath = os.path.join(out_folder, 'best-by.dat')
    conf_fpath = os.path.join(out_folder, 'conf.dat')
    all_conf_fpath = os.path.join(out_folder, 'all-conf.dat')

    np.savetxt(best_fpath, best_by)
    np.savetxt(conf_fpath, min_conf)
    np.savetxt(all_conf_fpath, np.asarray(all_probs))
Ejemplo n.º 14
0
def main(tseries_fpath, train_fpath, centroids_fpath, classes_fpath, out_folder,
	 gamma_max):
    gamma_max = int(gamma_max)

    X = ioutil.load_series(tseries_fpath, train_fpath)
    C = np.genfromtxt(centroids_fpath, dtype='f')
    
    y_train = np.arange(C.shape[0])
    y_true = np.genfromtxt(classes_fpath)
    max_pts = gamma_max
    #max_pts = X.shape[1]

    best_by = np.zeros(X.shape[0])
    min_conf = np.zeros(X.shape[0])
    all_probs = np.zeros(shape=(X.shape[0], max_pts))

    lousy_conf = 1.0 / C.shape[0] #if confidence is equal to this, classifier did nothing
    for num_pts in range(1, max_pts + 1):
        y_pred, probs = fit(C, y_train, X, y_true, num_pts)

        for i in xrange(X.shape[0]):
            p_true = probs[i, y_true[i]]
            if best_by[i] == 0 and y_pred[i] == y_true[i] and p_true > lousy_conf:
                best_by[i] = num_pts
                min_conf[i] = probs[i, y_true[i]]
            all_probs[i, num_pts - 1] = p_true

        summary_fpath = os.path.join(out_folder,\
                'class_summ-%d-pts.dat' % num_pts)
        probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts)

        with open(summary_fpath, 'w') as summary_file:
            print(classification_report(y_true, y_pred), file=summary_file)
        np.savetxt(probs_fpath, probs)
    
    best_fpath = os.path.join(out_folder, 'best-by.dat')
    conf_fpath = os.path.join(out_folder, 'conf.dat')
    all_conf_fpath = os.path.join(out_folder, 'all-conf.dat')

    np.savetxt(best_fpath, best_by)
    np.savetxt(conf_fpath, min_conf)
    np.savetxt(all_conf_fpath, np.asarray(all_probs))