Example #1
0
def main():

    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    svm(X_train, Y_train, X_test, Y_test)
def input_data(file_path):

    sentences, tags = read_data(file_path)
    print("sentences length: %s " % len(sentences))
    print("last sentence: ", sentences[-1])

    # ALBERT ERCODING
    print("start ALBERT encding")
    x = np.array([f(sent) for sent in sentences])
    print("end ALBERT encoding")

    # 对y值统一长度为MAX_SEQ_LEN
    new_y = []
    for seq in tags:
        num_tag = [label_id_dict[_] for _ in seq]
        if len(seq) < MAX_SEQ_LEN:
            num_tag = num_tag + [0] * (MAX_SEQ_LEN - len(seq))
        else:
            num_tag = num_tag[:MAX_SEQ_LEN]

        new_y.append(num_tag)

    # 将y中的元素编码成ont-hot encoding
    y = np.empty(shape=(len(tags), MAX_SEQ_LEN, len(label_id_dict.keys()) + 1))

    for i, seq in enumerate(new_y):
        y[i, :, :] = to_categorical(seq,
                                    num_classes=len(label_id_dict.keys()) + 1)

    return x, y
Example #3
0
def train_model():
    # 读取训练集,验证集和测试集数据
    train_x, train_y = input_data(train_file_path)
    train_x, test_x, train_y, test_y = train_test_split(train_x,
                                                        train_y,
                                                        test_size=0.2)
    #
    # dev_x, dev_y = input_data(dev_file_path)
    # test_x, test_y = input_data(test_file_path)

    # 模型训练
    model = build_model(MAX_SEQ_LEN, len(label_id_dict.keys()) + 1)
    history = model.fit(train_x,
                        train_y,
                        validation_data=(test_x, test_y),
                        batch_size=16,
                        epochs=5)

    model.save("%s_ner.h5" % event_type)

    # 绘制loss和acc图像
    plt.subplot(2, 1, 1)
    epochs = len(history.history['loss'])
    plt.plot(range(epochs), history.history['loss'], label='loss')
    plt.plot(range(epochs), history.history['val_loss'], label='val_loss')
    plt.legend()

    plt.subplot(2, 1, 2)
    epochs = len(history.history['crf_viterbi_accuracy'])
    plt.plot(range(epochs),
             history.history['crf_viterbi_accuracy'],
             label='crf_viterbi_accuracy')
    plt.plot(range(epochs),
             history.history['val_crf_viterbi_accuracy'],
             label='val_crf_viterbi_accuracy')
    plt.legend()
    plt.savefig("%s_loss_acc.png" % event_type)

    # 模型在测试集上的表现
    # 预测标签
    y = np.argmax(model.predict(test_x), axis=2)
    pred_tags = []
    for i in range(y.shape[0]):
        pred_tags.append([id_label_dict[_] for _ in y[i] if _])

    # 因为存在预测的标签长度与原来的标注长度不一致的情况,因此需要调整预测的标签
    test_sents, test_tags = read_data(test_file_path)
    final_tags = []
    for test_tag, pred_tag in zip(test_tags, pred_tags):
        if len(test_tag) == len(pred_tag):
            final_tags.append(test_tag)
        elif len(test_tag) < len(pred_tag):
            final_tags.append(pred_tag[:len(test_tag)])
        else:
            final_tags.append(pred_tag + ['O'] *
                              (len(test_tag) - len(pred_tag)))

    # 利用seqeval对测试集进行验证
    print(classification_report(test_tags, final_tags, digits=4))
Example #4
0
def main():

    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    #uncommment for grid searching
    #params = grid_search_kmeans(X_train, Y_train)

    params = {'n_clusters': 2}

    kmeans(X_train, Y_train, X_test, Y_test, params)
Example #5
0
def main():
    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    print(X_train.shape)
    print(X_test.shape)

    X_train = X_train.toarray()
    X_test = X_test.toarray()

    # reduce data
    X_train, X_test = fld(X_train, Y_train, X_test, 2)
Example #6
0
def main():
    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    print(X_train.shape)
    print(X_test.shape)

    X_train = X_train.toarray()
    X_test = X_test.toarray()

    #
    # means = np.mean(X_train.T, axis=1)
    # # center columns
    # cols = X_train - means
    # # print(cols)
    #
    # # cov matrix
    # cov = np.cov(cols.T)
    #
    # # calculate dims needed to be kept based on error rate
    # values, vectors = np.linalg.eig(cov)
    # dim = pca_error_rate(values, 0.2)
    # print("Reduced DIMS to: " + str(dim) + " from " + str(len(training_data[0])))

    # reduce data
    X_train, X_test = pca(X_train, X_test, 2952)

    print(X_train.shape)
    print(X_test.shape)

    params = {
        'activation': 'relu',
        'solver': 'lbfgs',
        'hidden_layer_sizes': (100, 10),
        'learning_rate_init': 0.0009
    }

    bpnn.bpnn(X_train, Y_train, X_test, Y_test, params)
Example #7
0
def main():
    computer = 'laptop'
    #computer = 'TS'

    if computer == 'laptop':
        data_file = 'C:/local/sandp500/sp470.csv'
        var_filename = 'C:/GoogleDrivePushpakUW/UW/6thYear/CSE546/Project/return_adj.pkl'
    else:
        data_file = 'H:/local/sandp500/sp470.csv'
        var_filename = 'H:/CSE546/Project/return_adj.pkl'

    ret_data, vol_data, comp_list = load_data.read_data()

    ################################################
    # Doing it on a small sample of ten firms
    # ret_data = ret_data.iloc[0:100 ,0:50]
    # print(ret_data.shape)
    # print("Return data: \n", ret_data)

    col_names = ret_data.columns.tolist()
    #print("Column names: \n", col_names)

    ################################################
    lagged_ret = ret_data.shift(1).dropna()
    num_obs = lagged_ret.shape[0]
    num_firms = lagged_ret.shape[1]

    # Aligning indices of X and y ( each y is a column in Y)
    Y = ret_data[ret_data.index.isin(lagged_ret.index)]

    train_size = int(0.8 * num_obs)
    # print("Num of obs in train set: ", train_size)

    train_index = np.random.choice(num_obs, train_size, replace=False)
    test_index = np.setdiff1d(np.arange(num_obs), train_index)

    # convert pandas dataframe to numpy array
    lagged_ret = lagged_ret.values
    Y = Y.values

    ##############################################################################
    # Do multiprocessing
    #import multiprocessing
    #num_cores = multiprocessing.cpu_count()
    #print("How many cores: ", num_cores)   # num of cores = 4

    inputs = np.arange(num_firms)

    with ProcessPoolExecutor(max_workers=2) as executor:
        res = executor.map(est_coefs, inputs)

    #print("Results: \n" + str(res))
    return res
Example #8
0
def train(epoch):
    global epoch_start
    epoch_start = time()
    batch_size = int(len(train_list_all) / BATCH_SIZE)  #25000 / 6
    i_global = 0

    for s in range(batch_size):
        train_x_1, train_x_2, train_y_ = read_data(train_list_all, s)

        start_time = time()
        i_global, _, batch_loss, batch_acc, yy, yyy, gt = sess.run(
            [global_step, optimizer, cost, accuracy, ya, y, y_],
            feed_dict={
                x_1: train_x_1,
                x_2: train_x_2,
                y_: train_y_,
                learning_rate: lr(epoch),
                phase: True,
                dr_rate: 1
            })
        duration = time() - start_time

        if s % 10 == 0:
            percentage = int(round((s / batch_size) * 100))

            bar_len = 29
            filled_len = int((bar_len * int(percentage)) / 100)
            bar = '=' * filled_len + '>' + '-' * (bar_len - filled_len)

            msg = "Global step: {:>5} - [{}] {:>3}% - acc: {:.4f} - loss: {:.4f} - {:.1f} sample/sec - lr: {:.8f}"
            print(
                msg.format(i_global, bar, percentage, batch_acc, batch_loss,
                           BATCH_SIZE / duration, lr(epoch)))
            #print("saving training result.. :" + SAVE_PATH + '/train/epoch_' + str(epoch)+'_s_'+str(s) + '.bmp')

            thresholded = []
            for yy_row in yy[0, :, :, 0]:
                thresholded.append(
                    [255 if yyy > THRESHOLD else 0 for yyy in yy_row])

            cv2.imwrite(
                SAVE_PATH + '/train/epoch_' + str(epoch) + '_s_' + str(s) +
                '_thresholded.bmp', np.uint8(thresholded))
            cv2.imwrite(
                SAVE_PATH + '/train/epoch_' + str(epoch) + '_s_' + str(s) +
                '_graysacale.bmp', np.uint8(yy[0, :, :, 0] * 255))
            cv2.imwrite(
                SAVE_PATH + '/train/epoch_' + str(epoch) + '_s_' + str(s) +
                '_groundtruth.bmp', np.uint8(gt[0] * 255))
            del thresholded

    test_and_save(i_global, epoch)
Example #9
0
def main():

    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    #uncommment for grid searching
    #params = grid_search_bpnn(X_train, Y_train)

    params = {
        'activation': 'relu',
        'solver': 'lbfgs',
        'hidden_layer_sizes': (100, 10),
        'learning_rate_init': 0.0009
    }

    bpnn(X_train, Y_train, X_test, Y_test, params)
Example #10
0
def process_labels(label_list, data_file):
    entity_list, token_list, _ = load_data.read_data(data_file)
    label_res_list = []
    assert len(entity_list) == len(token_list) == len(label_list)
    for idx in range(len(label_list)):
        label_res_list.append(
            get_tag_and_index(label_list[idx], token_list[idx]))
    for idx in range(len(label_res_list)):
        curr_entity = "".join(entity_list[idx])
        label_res_list[idx] = [
            val for val in label_res_list[idx]
            if "".join(val[-1]) != curr_entity
        ]

    return label_res_list
def analyse_data_single_chained(filename):
    emp_cov = load_data.read_data(
        filename,
        nodes=['French', 'Han', 'Karitiana', 'Sardinian', 'Yoruba'],
        noss=True)
    print emp_cov
    df = 100
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_basic_tree_statistics(
            tree_statistics.majority_tree, 'majority_tree', output='string'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param'),
        summary.s_variable('rescale_adap_param'),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s, output='double_missing')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]
    r = simulation_sanity.test_posterior_model(None,
                                               None,
                                               300000,
                                               summaries=summaries,
                                               thinning_coef=20,
                                               wishart_df=df,
                                               emp_cov=emp_cov,
                                               no_leaves_true_tree=5)
Example #12
0
def load_photos():
    df = load_data.read_data()

    # Get names
    pizza_names, pizza_eng_names = load_data.get_pizza_names(df)
    print(pizza_eng_names)

    # prepare image paths
    image_paths = []
    for name in pizza_eng_names:
        path = os.path.join(name, name + '3.jpg')
        image_paths.append(path)
    print(image_paths)

    images = load_data.load_images(image_paths)

    # cut pizza from photo
    pizza_imgs = load_data.cut_pizza_from_images(images)

    return pizza_eng_names, pizza_imgs
Example #13
0
def ms_to_treemix(filename='tmp.txt', samples_per_pop=20, no_pops=4, n_reps=1, filename2='tmp.treemix_in', treemix_files='tmp'):
    data=[]
    with open(filename, 'r') as f:
        for r in f.readlines():
            #print r[:5]
            data.append(map(int,list(r.rstrip())))
    m= array(data)
    if n_reps>1:#reorder the data so that there are more SNPs in stead of more samples/populations
        #print m.shape
        #print 'samples, pops, reps', samples_per_pop, no_pops, n_reps
        m=hstack(vsplit(m, n_reps))
        
    #print samples_per_pop
    sums=tuple([sum(m[(i*samples_per_pop):((i+1)*samples_per_pop), : ], axis=0) for i in xrange(no_pops)])
    #print sums, 'sums'
    with open(filename2, 'w') as f:
        f.write(' '.join(get_trivial_nodes(no_pops))+'\n')
        for s_vec in zip(*sums):
            f.write(' '.join([str(s)+','+str(samples_per_pop-s) for s in s_vec])+'\n')
    filename2_gz=filename2+'.gz'
    subprocess.call(['gzip','-f', filename2])
    return read_data(filename2_gz, blocksize=10000 ,outgroup='s3', noss=True, outfile=treemix_files)
Example #14
0
def input_data(file_path):
    sentences, tags = read_data(file_path)
    print("sentences length: %s " % len(sentences))
    print("last sentence: ", sentences[-1])

    # ALBERT ERCODING
    print("start ALBERT encding")
    x = []
    pbar = tqdm(sentences)
    for i, sent in zip(pbar, sentences):
        pbar.set_description("Processing bar: ")
        # 先bert编码
        x.append(f(sent))

    x = np.array(x)
    print("end ALBERT encoding")

    # 对y值统一长度为MAX_SEQ_LEN
    new_y = []
    for seq in tags:
        num_tag = [label_id_dict[_] for _ in seq]
        if len(seq) < MAX_SEQ_LEN:
            # 补0
            num_tag = num_tag + [0] * (MAX_SEQ_LEN - len(seq))
        else:
            num_tag = num_tag[:MAX_SEQ_LEN]

        new_y.append(num_tag)

    # 将y中的元素编码成ont-hot encoding  (个数,最大文本长度,分类个数)
    y = np.empty(shape=(len(tags), MAX_SEQ_LEN, len(label_id_dict.keys()) + 1))

    # to_categorical ont-hot encoding
    for i, seq in enumerate(new_y):
        y[i, :, :] = to_categorical(seq,
                                    num_classes=len(label_id_dict.keys()) + 1)

    return x, y
def predict(request,input_tablename,output_tablename):

    # opt = option.Options()
    # weight_path = opt.weight_path
    weight_path = 'C:/Users/bong/project/semiconductor_project/semiconductor_project/web_server/web/predict/test_model_new.pth'

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    """# 데이터 로드"""
    data_realtime = load_data.read_data(input_tablename,output_tablename)

    """# 모델생성"""
    model = modeling.make_model(device, weight_path)

    """# 두께 예측"""
    data_realtime = data_realtime.iloc[:,1:-1]
    data_realtime_numpy = torch.from_numpy(data_realtime.astype(float).values)
    data_realtime_numpy_de = data_realtime_numpy.to(device)
    outputs = model(data_realtime_numpy_de.float()).cpu().detach().numpy()
    # outputs = model(data_realtime_numpy_de.float()).cpu().detach().numpy().round(-1)
    pred_test = pd.DataFrame(outputs)
    pred_test.columns = ['layer_1', 'layer_2', 'layer_3', 'layer_4']

    return pred_test,request
import os
os.system('CLS')

from pylab import plot, show
from numpy import vstack, array
from numpy.random import rand
import numpy as np
from scipy.cluster.vq import kmeans, kmeans2, vq
import pandas as pd
from math import sqrt
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt
import load_data

ret_data, vol_data, volume_dat, comp_list = load_data.read_data()
#print(ret_data.head())
#print(vol_data.head())

returns = ret_data.mean() * 252  # annualized return
vol = vol_data.mean()
volume = volume_dat.mean()
#print(returns)
data = np.asarray([np.asarray(returns), np.asarray(vol), np.asarray(volume)]).T
X = data

max_cluster = 20
incr = 1

distortions = []
for k in range(incr, max_cluster):
    k_means = KMeans(n_clusters=k, random_state=0)
            nb_eval_examples += ids.size(0)
            nb_eval_steps += 1
        eval_loss = eval_loss / nb_eval_steps
        print("Validation loss: {}".format(eval_loss))
        print("Validation Accuracy: {}".format(eval_accuracy / nb_eval_steps))


if __name__ == '__main__':

    # Preparing for CPU or GPU usage
    dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    tokenizer = BertTokenizer.from_pretrained('./{}'.format(BERT_MODEL_DIR))

    # Creating the Dataset and DataLoader for the neural network
    train_sentences, train_labels = read_data(train_file_path)
    train_labels = [[tag2idx.get(l) for l in lab] for lab in train_labels]
    test_sentences, test_labels = read_data(test_file_path)
    test_labels = [[tag2idx.get(l) for l in lab] for lab in test_labels]
    print("TRAIN Dataset: {}".format(len(train_sentences)))
    print("TEST Dataset: {}".format(len(test_sentences)))

    training_set = CustomDataset(tokenizer, train_sentences, train_labels,
                                 MAX_LEN)
    testing_set = CustomDataset(tokenizer, test_sentences, test_labels,
                                MAX_LEN)

    train_params = {
        'batch_size': TRAIN_BATCH_SIZE,
        'shuffle': True,
        'num_workers': 0
Example #18
0
    def __init__(self):
        super().__init__()
        self.canvas_show = 'pie'  # or 'sleep'

        self.withdraw()
        splash = Splash(self)
        splash.pgb['maximum'] = 5

        import matplotlib
        import math
        self.math = math
        import matplotlib.pyplot as plt
        self.plt = plt
        from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
        from tkinter.ttk import Progressbar, Style

        splash.pgb['value'] = 1
        splash.label.image = splash.gif1
        splash.update()

        plt.style.use('ggplot')

        matplotlib.rcParams['font.family'] = 'SimHei'
        self.Set3 = plt.cm.Set3(range(10))
        self.Paired = plt.cm.Paired(range(10))

        splash.pgb['value'] = 2
        splash.label.image = splash.gif1
        splash.update()

        # +++++++++++++++
        # +  GUI_setup  +
        # +++++++++++++++
        from tkinter import Frame, Text
        from tkinter import ttk
        from tkinter.ttk import Button
        self.title('Weekery')
        #Style().theme_use('clam')
        '''
        root
        |- Progressbar.bottom
        |- Frame_Left
        |  |- Frame_btn_left.top
        |  |  |- left(btn{d,w,m,y})
        |  |  |- right(btn{sleep,freq_pic,freq_bar})
        |  |  |- Frame_btn_mid
        |  |     |- <.left
        |  |     |- +.left
        |  |     |- >.right
        |  |     |- -.right
        |  |     |- calendar.middle
        |  |     |- label_date
        |  |- fig_up.top
        |  |- fig_down.top
        |- Frame_Right
           |- Frame_btn_right
           |  |- btn_setting.right
           |  |- btn_reload.right
           |- Text.bottom.top expand=both
        '''
        # ====== Frames ======
        self.frame_left = Frame(self)

        self.frame_right = Frame(self)

        self.frame_btn_left = Frame(self.frame_left)
        self.frame_btn_left.config(bg='white')
        self.frame_btn_mid = Frame(self.frame_btn_left)

        self.frame_btn_right = Frame(self.frame_right)
        self.frame_btn_right.config(bg='white')

        # ====== Buttons ======
        ttk.Style().configure("TButton", background='white')
        ttk.Style().configure("symbol.TButton", font=(20))
        #ttk.Style().configure("TButton", foreground='white')
        self.btn_days = Button(self.frame_btn_left, text='日', command=self.days, width=3)
        self.btn_days.config(state='disable') #bg='white', 
        self.btn_weeks = Button(self.frame_btn_left, text='周', command=self.weeks, width=3)
        #self.btn_weeks.config(bg='white')
        self.btn_months = Button(self.frame_btn_left, text='月', command=self.months, width=3)
        #self.btn_months.config(bg='white')
        self.btn_years = Button(self.frame_btn_left, text='年', command=self.years, width=3)
        #self.btn_years.config(bg='white')
        self.btn_switch_freq_pie = Button(self.frame_btn_left, text='饼图', command=self.pie, width=6)
        #self.btn_switch_freq_pie.config(bg='white')
        self.btn_switch_sleep = Button(self.frame_btn_left, text='睡眠', command=self.sleep, width=6)
        #self.btn_switch_sleep.config(bg='white')
        self.btn_switch_freq_bar = Button(self.frame_btn_left, text='词频', command=self.bar, width=6)
        #self.btn_switch_freq_bar.config(bg='white')

        self.btn_previous = Button(self.frame_btn_mid, text='◀', style='symbol.TButton', command=self.previous, width=2)
        #self.btn_previous.config(bg='white')
        self.btn_backward = Button(self.frame_btn_mid, text='▶', style='symbol.TButton',command=self.backward, width=2)
        #self.btn_backward.config(bg='white')
        self.btn_calendar = Button(self.frame_btn_mid, text="▦", style='symbol.TButton',command=self.ask_selected_date, width=2)
        #self.btn_calendar.config(bg='white')
        self.btn_plus = Button(self.frame_btn_mid, text='+', style='symbol.TButton', command=self.plus, width=2)
        #self.btn_plus.config(bg='white')
        self.btn_minus = Button(self.frame_btn_mid, text='-',style='symbol.TButton', command=self.minus, width=2)
        #self.btn_minus.config(bg='white')

        self.btn_reload = Button(self.frame_btn_right, text='重载', command=self.reload, width=6)
        #self.btn_reload.config(bg='white')
        self.btn_settings = Button(self.frame_btn_right, text='设置', command=self.settings, width=6)
        #self.btn_settings.config(bg='white')

        # ====== Others ======
        self.fig_up = plt.figure(figsize=(7, 3))
        self.fig_down = plt.figure(figsize=(7, 3))
        self.canvas_up = FigureCanvasTkAgg(self.fig_up, master=self.frame_left)
        self.canvas_down = FigureCanvasTkAgg(self.fig_down, master=self.frame_left)

        self.pgb = Progressbar(self, orient='horizontal', length=1000, mode='determinate')

        self.notes = Text(self.frame_right, width=50)
        self.notes.config(bg='azure')
        
        self.label_date = Label(self.frame_btn_mid, text='加载中...', width=15)

        splash.pgb['value'] = 3
        splash.label.image = splash.gif1
        splash.update()

        # ++++++++++++++++++
        # +  GUI Packing   +
        # ++++++++++++++++++
        # level-1
        self.pgb.pack(side='bottom', fill='both')

        # level-1
        self.frame_left.pack(side='left', fill='both', expand='YES')
        # # level-2
        self.frame_btn_left.pack(side='top', fill='both')
        self.canvas_up.get_tk_widget().pack(side='top', fill='both', expand='YES')
        self.canvas_down.get_tk_widget().pack(side='top', fill='both', expand='YES')
        # # # level-3
        self.btn_days.pack(side='left')
        self.btn_weeks.pack(side='left')
        self.btn_months.pack(side='left')
        self.btn_years.pack(side='left')
        self.btn_switch_freq_bar.pack(side='right')
        self.btn_switch_freq_pie.pack(side='right')
        self.btn_switch_sleep.pack(side='right')
        self.frame_btn_mid.pack(side='top')
        # # # # level-4
        self.btn_previous.pack(side='left')
        self.btn_minus.pack(side='left')
        self.btn_backward.pack(side='right')
        self.btn_plus.pack(side='right')
        self.btn_calendar.pack(side='left')
        self.label_date.pack(side='right')

        # level-1
        self.frame_right.pack(side='left', fill='both', expand='YES')
        # # level-2
        self.frame_btn_right.pack(side='top', fill='both')
        # # # level-3
        self.btn_settings.pack(side='right')
        self.btn_reload.pack(side='right')
        # # level-2
        self.notes.pack(side='top', fill='both', expand='YES')

        splash.pgb['value'] = 4
        splash.label.image = splash.gif1
        splash.update()

        # ++++++++++++++++++
        # +  Import class  +
        # ++++++++++++++++++
        import sqlite3
        from config import Config
        from controls import Controls
        from load_data import wiz_week_index, read_data
        splash.pgb['value'] = 5
        splash.label.image = splash.gif1
        splash.update()
        splash.destroy()
        # ============= Show Main GUI ==============
        self.protocol('WM_DELETE_WINDOW', self.close_window)
        self.wm_state('zoomed')  # maximize windows
        self.deiconify()

        self.cfg = Config(self)
        
        # user choose to cancle in configuration
        if self.cfg.cancel:
            return
            
        self.db_path = self.cfg.cache_dir + '/weekery.db'
        self.conn = sqlite3.connect(self.db_path)

        self.id_filenames, self.id_dates = wiz_week_index(self.cfg)

        if self.cfg.last_read == 20160000:
            read_data(self, self.cfg, self.pgb, self.id_dates, self.id_filenames, 'all')
        else:
            read_data(self, self.cfg, self.pgb, self.id_dates, self.id_filenames, dialog=False)

        self.controls = Controls(self.conn)
        self.conn.commit()

        self.colors = {v: (int(f[4:7])/255, int(f[9:12])/255, int(f[14:17])/255, 1) for f, v in self.cfg.color_kind.items()}
        self._paint()
Example #19
0
 def reload(self):
     reload_option = ReloadOption()
     self.wait_window(reload_option)
     if reload_option.reload_mode:
         if reload_option.reload_mode == '全部重载':
             read_data(self, self.cfg, self.pgb, self.id_dates, self.id_filenames, 'all', dialog=False)
             showinfo('提示', '全部数据重载完成!')
             self.weeks()
         elif reload_option.reload_mode == '最近一周':
             read_data(self, self.cfg, self.pgb, self.id_dates, self.id_filenames, 1, dialog=False)
             showinfo('提示', '最近一周数据重载完成!')
             self.weeks()
         elif reload_option.reload_mode == '最近一个月':
             read_data(self, self.cfg, self.pgb, self.id_dates, self.id_filenames, 4, dialog=False)
             showinfo('提示', '最近一个月数据重载完成!')
             self.weeks()
         elif reload_option.reload_mode == '最近三个月':
             read_data(self, self.cfg, self.pgb, self.id_dates, self.id_filenames, 12, dialog=False)
             showinfo('提示', '最近三个月数据重载完成!')
             self.weeks()
         elif reload_option.reload_mode == '最近半年':
             read_data(self, self.cfg, self.pgb, self.id_dates, self.id_filenames, 26, dialog=False)
             showinfo('提示', '最近半年数据重载完成!')
             self.weeks()
         elif reload_option.reload_mode == '最近一年':
             read_data(self, self.cfg, self.pgb, self.id_dates, self.id_filenames, 52, dialog=False)
             showinfo('提示', '最近一年数据重载完成!')
             self.weeks()
         else:
             pass
Example #20
0
			weights=self.weights,
			biases=self.biases,
			layers=self.layers,
			epochs=self.epochs,
			learning_rate=self.learning_rate
		)

	def load_model(self, filename='model.npz'):
		model = np.load(os.path.join(os.curdir, 'models', filename))
		self.weights = model['weights']
		self.biases = model['biases']
		self.layers = model['layers']
		self.num_layers = len(layers)
		self.epochs = model['epochs']
		self.learning_rate = model['learning_rate']
		self.activations = [np.zeros((x, 1)) for x in self.layers]

load_data.download_data()
training_images, training_labels, test_images, test_labels = load_data.read_data()

nn = NNClassifier(n_features=N_FEATURES,
                  layers = [N_FEATURES, 30, 10],
                  l2 = 0.5,
                  epochs = 30,
                  learning_rate = 0.001)

nn.fit(training_images, training_labels, test_images, test_labels)



Example #21
0
from load_data import read_data, load_last_user_logs, get_num_user_logs

## ========================= 1. Load and clean data ======================== ##

'''
train = pd.read_csv('../01_Data/train.csv')
train = pd.concat((train, pd.read_csv('../01_Data/train_v2.csv')), axis=0, ignore_index=True).reset_index(drop=True)
test = pd.read_csv('../01_Data/sample_submission_v2.csv')
members = pd.read_csv('../01_Data/members_v3.csv')

transactions = pd.read_csv('../01_Data/transactions.csv')
transactions = pd.concat((transactions, pd.read_csv('../01_Data/transactions_v2.csv')), axis=0, ignore_index=True).reset_index(drop=True)
'''

print("\n1. Load and data ...\n")
train, test, members, transactions = read_data()

## ========================= 2. Feature engineering ======================== ##
print("\n2. Adding and selecting features ...\n")

# Prepare transactions
current_transactions = transactions.sort_values(by=['transaction_date'], ascending=[False]).reset_index(drop=True)

# Get features for current transaction
print("\n   a) Creating features from most recent transaction ...\n")
# get most recent transaction
current_transactions = current_transactions.drop_duplicates(subset=['msno'], keep='first')
# Calculate discount
current_transactions['discount'] = current_transactions['plan_list_price'] - current_transactions['actual_amount_paid']
# Calculate cost per day
current_transactions['payment_plan_days'] = current_transactions['payment_plan_days'].replace(0, 30)
Example #22
0
from sklearn.neighbors import KNeighborsClassifier


def doWork(train, test, labels):
    print("Converting training to matrix")
    train_mat = np.mat(train)
    print("Fitting knn")
    knn = KNeighborsClassifier(n_neighbors=10, algorithm="kd_tree")
    print(knn.fit(train_mat, labels))
    print("Preddicting")
    predictions = knn.predict(test)
    print("Writing to file")
    write_to_file(predictions)
    return predictions


def write_to_file(predictions):
    f = open("output-knn-skilearn.csv", "w")
    for p in predictions:
        f.write(str(p))
        f.write("\n")
    f.close()


if __name__ == '__main__':
    from load_data import read_data
    train, labels = read_data("train.csv")
    test, tmpl = read_data("test.csv", test=True)
    predictions = doWork(train, test, labels)
    print(predictions)
Example #23
0
# -*- coding: utf-8 -*-
# @Time    : 2021/1/26 13:55
# @Author  : ztwu4
# @Email   : [email protected]
# @File    : test.py
# @Software: PyCharm

# 测试句子
from load_data import read_data
from model_train import PreProcessInputData
from util import train_file_path, test_file_path

text = "经过工作人员两天的反复验证、严密测算,记者昨天从上海中心大厦得到确认:被誉为上海中心大厦“定楼神器”的阻尼器,在8月10日出现自2016年正式启用以来的最大摆幅。"
word_labels, seq_types = PreProcessInputData([text])
print(word_labels)
print(seq_types)

input_train, result_train = read_data(train_file_path)
for sent, tag in zip(input_train[:10], result_train[:10]):
    print(sent, tag)
    print "Fitting kNN with k=10, kd_tree"
    knn = KNeighborsClassifier(n_neighbors=10, algorithm="kd_tree")
    print knn.fit(X_train_reduced, labels)

    print "Reducing test to %d components" % PCA_COMPONENTS
    X_test_reduced = pca.transform(test)

    print "Preddicting numbers"
    predictions = knn.predict(X_test_reduced)

    print "Writing to file"
    write_to_file(predictions)

    return predictions


def write_to_file(predictions):
    f = open("output-pca-knn-skilearn-v3.csv", "w")
    for p in predictions:
        f.write(str(p))
        f.write("\n")
    f.close()


if __name__ == '__main__':
    from load_data import read_data
    train, labels = read_data("../data/train.csv")
    test, tmpl = read_data("../data/output3.csv", test=True)
    print tmpl
    print doWork(train, labels, test)
Example #25
0
        for item in line:
            tag.append(int(label_id_dict[item.strip()]))
        tag.append(0)
        tags.append(tag)

    pad_tags = pad_sequences(tags,
                             maxlen=MAX_SEQ_LEN,
                             padding="post",
                             truncating="post")
    result_tags = np.expand_dims(pad_tags, 2)
    return result_tags


if __name__ == '__main__':
    # 读取训练集和测试集数据
    input_train, result_train = read_data(train_file_path)
    input_test, result_test = read_data(test_file_path)
    for sent, tag in zip(input_train[:10], result_train[:10]):
        print(sent, tag)
    for sent, tag in zip(input_test[:10], result_test[:10]):
        print(sent, tag)

    # 训练集
    input_train_labels, input_train_types = PreProcessInputData(input_train)
    print(input_train_types[0])
    result_train = PreProcessOutputData(result_train)
    # 测试集
    input_test_labels, input_test_types = PreProcessInputData(input_test)
    result_test = PreProcessOutputData(result_test)
    # add warmup
    total_steps, warmup_steps = calc_train_steps(
    print "Fitting kNN with k=10, kd_tree"
    knn = KNeighborsClassifier(n_neighbors=10, algorithm="kd_tree")
    print knn.fit(X_train_reduced, labels)

    print "Reducing test to %d components" % PCA_COMPONENTS
    X_test_reduced = pca.transform(test)

    print "Preddicting numbers"
    predictions = knn.predict(X_test_reduced)

    print "Writing to file"
    write_to_file(predictions)

    return predictions


def write_to_file(predictions):
    f = open("output-pca-knn-skilearn-v3.csv", "w")
    for p in predictions:
        f.write(str(p))
        f.write("\n")
    f.close()


if __name__ == '__main__':
    from load_data import read_data
    train, labels = read_data("/home/prasad/kaggle-digit-recognizer-master/data/train.csv")
    test, tmpl = read_data("/home/prasad/kaggle-digit-recognizer-master/data/test.csv", test=True)
    print doWork(train, labels, test)
Example #27
0
from adaboost import adaboost_trian,adaClassify
from load_data import read_data
if __name__ =='__main__':
    data,label = read_data()
    classifier_array = adaboost_trian(data,label,9)
    # print(classifier_array)
    re = adaClassify([[5,5],[0,0]],classifier_array)
    print(re)
Example #28
0
from feature_selection import generate_random_solution
from feature_selection import sort_pop
from feature_selection import random_crossover
from feature_selection import mutation
from feature_selection import half_crossover
from feature_selection import remove_duplicates
import pandas as pd
import random
import time
import matplotlib.pyplot as plt

# Set random seed
random.seed(1)
time.sleep(1)

dataset = read_data()
dataset = clean_data(dataset)
print("!!!!!!!!!")
print(len(dataset.index))

print("Fitness if using all features", fitness_function(dataset))

# Use the standard 13 features as a benchmark to measure against
standard_dataset = dataset[[
    "age", "sex", "CP", "trestbps", "chol", "FBS", "restecg", "thalach",
    "exang", "oldpeak", "slope", "ca", "thal", "num"
]]

print("Fitness of the standard features typically included",
      fitness_function(standard_dataset))
    return sorted(a.iteritems(), key=operator.itemgetter(1), reverse=True)[0][0]


def doWorkNumpy(train, test, labels):
    k = 20
    train_mat = np.mat(train)
    output_file = open("output-numpy2.csv", "w", 0)
    idx = 0
    size = len(test)
    for test_sample in test:
        idx += 1
        start = time.time()
        knn = np.argsort(np.sum(np.power(np.subtract(train_mat, test_sample), 2), axis=1), axis=0)[:k]
        s = time.time()
        prediction = majority_vote(knn, labels)
        output_file.write(str(prediction))
        output_file.write("\n")
        print "Knn: %f, majority %f" % (time.time() - start, time.time() - s)
        print "Done: %f" % (float(idx) / size)
    output_file.close()
    output_file = open("done.txt", "w")
    output_file.write("DONE")
    output_file.close()


if __name__ == '__main__':
    from load_data import read_data
    train, labels = read_data("../data/train.csv")
    test, tmpl = read_data("../data/test.csv", test=True)
    doWorkNumpy(train, test, labels)
import math

import tensorflow as tf

from load_data import read_data
from network import Model

train_files, train_labels, val_files, val_labels = read_data(no_of_train=275)

filenames = tf.constant(train_files, dtype=tf.string)
labels = tf.constant(train_labels, dtype=tf.float32)

val_filenames = tf.constant(val_files, dtype=tf.string)
val_labels = tf.constant(val_labels, dtype=tf.float32)

MIN_VAL = math.inf
EPOCHS = 100
BATCHES = 1
NO_OF_ITERS = int(filenames.get_shape()[0]) // BATCHES
LOG_DIR = '/tmp'
SAVE_DIR = '/tmp/macula-iqa.cpkt'
LEARNING_RATE = 1e-3
DROPOUT_PROB = 0.5

sess = tf.Session()


def _build_dataset(_filenames, _labels, epochs, batches):
    dataset = tf.data.Dataset.from_tensor_slices((_filenames, _labels))
    dataset = dataset.prefetch(100)
    dataset = dataset.map(_parse_function, 10)
Example #31
0
import numpy as np
from matplotlib.mlab import PCA as mlabPCA
import matplotlib.pyplot as plt
from load_data import read_data
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d


all_samples = read_data("data/train1.csv")

y_train = np.array([x[0] for x in all_samples])
X_train = np.array([x[1:] for x in all_samples])
	
data_array = X_train
mlab_pca = mlabPCA(data_array)

Class0 = [i for i in range(len(y_train)) if y_train[i]==0 ]
Class1 = [i for i in range(len(y_train)) if y_train[i]==1 ]

fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111, projection='3d')
ax.plot(mlab_pca.Y[Class0,0], mlab_pca.Y[Class0,1],mlab_pca.Y[Class0,2], 'o', markersize=8, color='blue', alpha=0.5, label='class1')
ax.plot(mlab_pca.Y[Class1,0], mlab_pca.Y[Class1,1],mlab_pca.Y[Class1,2], '^', markersize=8, alpha=0.5, color='red', label='class2')


#plt.plot(mlab_pca.Y[Class0,0],mlab_pca.Y[Class0,1],mlab_pca.Y[Class0,2] ,'o', markersize=7,color='blue', alpha=0.5, label='class1')
#plt.plot(mlab_pca.Y[Class1,0], mlab_pca.Y[Class1,1],mlab_pca.Y[Class1,2], '^', markersize=7,color='red', alpha=0.5, label='class2')

plt.show()
Example #32
0
def further_process_labels(label_list, data_file):
    entity_list, token_list, tag_list = load_data.read_data(data_file)
    for idx in range(len(label_list)):
        manual_rule(entity_list[idx], label_list[idx])
    return label_list
Example #33
0
File: main.py Project: gtc0315/p2
#####User input: choose a dataset here#####
dataset = "13"
training = False  # change to False if there is no vicon data
###########################################

if __name__ == '__main__':
    start = timeit.default_timer()  # start timer

    ifile = "imu/imuRaw" + dataset + ".p"
    if training:
        vfile = "vicon/viconRot" + dataset + ".p"

    # load data
    ts = load_data.tic()
    imud = load_data.read_data(ifile)
    if training:
        vicd = load_data.read_data(vfile)
    load_data.toc(ts, "Data import")

    # format data
    imu_vals = imud['vals']
    imu_ts = imud['ts']
    imu_n = len(imu_ts[0])
    if training:
        vic_vals = vicd['rots']
        vic_ts = vicd['ts']
        vic_n = len(vic_ts[0])
    bias, scale = ukf_lib.bias_scale(imu_vals, 100)
    imu_vals = ukf_lib.unbias_reorder(imu_vals, imu_n, bias, scale)