def create_left_data(directory):
    flag='left'
    # import create_data
    create_data(directory,flag)
    print('folder name',directory)
    # folder = name
    # print('folder',folder)
    print("done")
    return redirect(url_for('right_face',directory = directory))
def create_up_data(directory):
    flag='up'
    # import create_data
    create_data(directory,flag)
    print('folder name',directory)
    # folder = name
    # print('folder',folder)
    print("done")
    return redirect(url_for('down_face',directory = directory))
def create_down_data(directory):
    flag='down'
    # import create_data
    create_data(directory,flag)
    print('folder name',directory)
    # folder = name
    # print('folder',folder)
    auth_name = 'again'
    print("done")
    return redirect(url_for('home',auth_name=auth_name))
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--mode",
        help="mode to run the model. options: [train, test]")
    args = parser.parse_args()
    if args.mode == 'train':
        print("Creating data....")
        create_data()
        print("Fitting Model...")
        model = train_model(nb_epoch=NUMBER_OF_EPOCHS, nb_batch=BATCH_SIZE)
    elif args.mode == 'test':
        model = load_model('lstm.h5')
        predictions = evaluate_predictions(model)
        plot_predictions(predictions)
Esempio n. 5
0
def compare_errors(k_vals, input_data_file):
    ## read in the input data
    initial_data = create_data(input_data_file)

    ## create plots of the data (this should save the images within the current
    ## directory)
    plot_data(initial_data)

    ## integerize the data labels
    integerized_data, label_dict = integerize_labels(initial_data)

    ## split the data into train and test
    train, test = split(integerized_data)

    ## compute the errors
    errors = {}
    for k in k_vals:
        predicted_labels = knn(train, test, k)
        error_rate = calculate_error_rate(predicted_labels, test)
        errors[k] = error_rate

    ## BONUS: weighting
    for k in k_vals:
        weighted_predicted_labels = weighted_knn(train, test, k)
        weighted_error_rate = calculate_error_rate(weighted_predicted_labels,
                                                   test)
        print("Weighted error value for k = %d was %f" %
              (k, weighted_error_rate))

    return errors
def split_kfold(input_dir, output_dir, keep_none):
    train_data = pd.read_csv('./data/Train_Data.csv',
                             sep=',',
                             dtype=str,
                             encoding='utf-8')
    test_data = pd.read_csv('./data/Test_Data.csv',
                            sep=',',
                            dtype=str,
                            encoding='utf-8')

    train_data.fillna('', inplace=True)
    test_data.fillna('', inplace=True)

    train_data['cleaned_text'] = train_data['text'].apply(create_data.clean)
    train_data['cleaned_title'] = train_data['title'].apply(create_data.clean)
    test_data['cleaned_text'] = test_data['text'].apply(create_data.clean)
    test_data['cleaned_title'] = test_data['title'].apply(create_data.clean)

    important_chars = create_data.collect_important_chars(
        train_data['unknownEntities'])

    create_data.remove_chars(train_data, test_data)

    dev_kfolds = []
    train_kfolds = []

    for k in range(FOLD):
        train_data = train_data.sample(frac=1, random_state=2018 -
                                       k).reset_index(drop=True)
        dev_kfolds.append(train_data.tail(100))
        train_kfolds.append(train_data.head(train_data.shape[0] - 100))

    for k in range(FOLD):
        kfold_dir = os.path.join(output_dir, 'fold{}'.format(k))
        if not os.path.isdir(kfold_dir):
            os.mkdir(kfold_dir)
        create_data.create_data(dev_kfolds[k], f'{kfold_dir}/dev.txt',
                                important_chars, True, keep_none)
        create_data.create_data(train_kfolds[k], f'{kfold_dir}/train.txt',
                                important_chars, False, keep_none)
Esempio n. 7
0
                                                                                                    'Validation',
                                                                                                    'Test Laptop (General)',
                                                                                                    'Test Laptop (Same Title) (Space)',
                                                                                                    'Test Laptop (Same Title) (No Space)',
                                                                                                    'Test Laptop (Different Title) (Space)',
                                                                                                    'Test Laptop (Different Title) (No Space)']})

    print('\nOutputing models to {} with base name {}\n'.format(folder, model_name))

    # Create the folder for the model if it doesn't already exist
    if not os.path.exists('models/{}'.format(folder)):
        os.mkdir('models/{}'.format(folder))

    # Create the data if it doesn't exist
    if not os.path.exists('data/train/total_data.csv') or not os.path.exists('data/test/final_laptop_test_data.csv'):
        create_data()

    # Load the data
    train_data = pd.read_csv('data/train/total_data.csv', nrows=TRAIN_SIZE, chunksize=BATCH_SIZE)
    val_data = pd.read_csv('data/train/total_data.csv', skiprows=TRAIN_SIZE, names=['title_one', 'title_two', 'label', 'index'])
    del val_data['index']
    val_data = val_data.to_numpy()
    val_labels = val_data[:, 2].astype('float32')
    val_data = val_data[:, 0:2]

    test_laptop_data, test_laptop_labels = split_test_data(pd.read_csv('data/test/final_laptop_test_data.csv')) # General laptop test data
    test_gb_space_data, test_gb_space_labels = split_test_data(pd.read_csv('data/test/final_gb_space_laptop_test.csv')) # Same titles; Substituted storage attributes
    test_gb_no_space_data, test_gb_no_space_labels = split_test_data(pd.read_csv('data/test/final_gb_no_space_laptop_test.csv')) # Same titles; Substituted storage attributes
    test_retailer_gb_space_data, test_retailer_gb_space_labels = split_test_data(pd.read_csv('data/test/final_retailer_gb_space_test.csv')) # Different titles; Substituted storage attributes
    test_retailer_gb_no_space_data, test_retailer_gb_no_space_labels = split_test_data(pd.read_csv('data/test/final_retailer_gb_no_space_test.csv')) # Different titles; Substituted storage attributes
    print('Loaded all test files')
Esempio n. 8
0
from create_data import create_data

def integerize_labels(data):
    """Function that takes a numpy matrix and creates an identical numpy array
    except with integerized labels and a dictionary of the label-int pairs"""
    
    #create dictionary
    label_dict = {}    
    labelnum = len(data.T)-1
    count = 0;
    for rownum in range(0, len(data)):
        label = data[rownum,labelnum]
        if label not in label_dict:
            label_dict[label] = count
            count += 1
            
    integerized_data = data
    
    #integerize labels
    #thanks mdml
    #http://stackoverflow.com/questions/19666626
    for label in label_dict:
        integerized_data[integerized_data == label] = label_dict[label]

    return (integerized_data, label_dict)

if __name__ == '__main__':
    irisdata = create_data("iris")
    data, labeldict = integerize_labels(irisdata)
    print(data)
    print(labeldict)
Esempio n. 9
0
        hist_kws={"cumulative": True},
        kde_kws={"cumulative": True},
    )

    plt.legend()
    plt.show()

    print("Diff ill:", ill_diff)
    print("Diff Control:", control_diff)

    numpy.savetxt("1-pca.csv", ill_diff, delimiter=",")
    numpy.savetxt("2-pca.csv", control_diff, delimiter=",")


if __name__ == "__main__":
    data = cd.create_data()
    mean = cd.mean_diff(data)

    # Select an ill patient and a control patient and check the diff from mean
    ill = data[data["K760"] == 2]
    control = data[data["K760"] == 1]

    dataFrame = data[data["K760"] != 3]

    background = data[(data["K760"] == 3) | (data["D50*"] == 3)]
    background = background.values

    cpca(ill, control, dataFrame, background)
    # pca(ill, control, mean)

    # K-S Test
Esempio n. 10
0
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("Receiver Operating Characteristic (ROC) Curve")
    plt.legend()
    plt.show()


def plot_confustion_matrix(con_matrix):
    class_names = [1, 2]  # name  of classes
    fig, ax = plt.subplots()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names)
    plt.yticks(tick_marks, class_names)

    # create heatmap
    sns.heatmap(pd.DataFrame(con_matrix), annot=True, cmap="YlGnBu", fmt="g")
    ax.xaxis.set_label_position("top")
    plt.tight_layout()
    plt.title("Confusion matrix", y=1.1)
    plt.ylabel("Actual label")
    plt.xlabel("Predicted label")

    plt.show()


if __name__ == "__main__":
    df = cd.create_data()

    logistic_regression_cpca(df)
    logistic_regression_pca(df)
Esempio n. 11
0

def test(corn_weights, scratch_corn_weights):
    start = time.time()

    test_net, accuracy = eval_corn_net(corn_weights)
    print 'Accuracy, trained from ImageNet initialization: %3.1f%%' % (
        100 * accuracy, )
    scratch_test_net, scratch_accuracy = eval_corn_net(scratch_corn_weights)
    print 'Accuracy, trained from random initialization: %3.1f%%' % (
        100 * scratch_accuracy, )

    end = time.time()
    print 'The stage of testing use time: %.2fs' % (end - start)


if __name__ == '__main__':
    # set caffe
    caffe.set_mode_cpu()
    # caffe.set_mode_gpu()
    # caffe.set_device(0)

    data_path = raw_input("Please input image path: ")
    create_data(data_path)

    weights = 'model/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
    niter = raw_input("Please input iterations in training: ")
    corn_weights, scratch_corn_weights = train(int(niter), weights)

    test(corn_weights, scratch_corn_weights)
Esempio n. 12
0
from create_initial_matrix import create_initial_matrix
from copying import turing_pattern
from create_data import create_data
from optimising import optimise
from MCMC import MCMC_main, MCMC_save_plot
from turing_class import NumericalSolver

a = NumericalSolver()
a.plot_fig()

size = 100  # size of matrix dimensions
time_lst = [0.0, 1.0, 2.0, 3.0, 4.0,
            5.0]  # times that we might want to solve for

create_initial_matrix(size)  # create initial matrix
turing_pattern(a=2.8e-4, b=5e-3, tau=2, k=0)  # making clean data
create_data(time_lst)  # making noisy data
a_est, b_est = optimise(time=1.0,
                        a_initial=0.0,
                        a_final=1e-3,
                        b_initial=0.0,
                        b_final=1e-2,
                        iters=10)  # rough parameter recovery of a and b
res, final_guess = MCMC_main(a_init=a_est,
                             b_init=b_est,
                             a_max=1e-3,
                             b_max=1e-2,
                             iters=1000)  # this is quite slow
MCMC_save_plot(res)
Esempio n. 13
0
    for i_method in methods_to_run:
        method_t = methods_opt[i_method]
        args.model_type = method_t[1]
        args.estimation_type = method_t[2]

        for i_samp_num, samp_num in enumerate(num_train_samp_grid):
            args.n_train = samp_num

            for i_rep in range(n_rep):
                print('Method {} out of {}'.format(i_method + 1, n_methods))
                print('Sample size {} out of {}'.format(
                    i_samp_num + 1, n_samp_grid))
                print('Replication {} out of {}'.format(i_rep + 1, n_rep))

                # Create Data
                train_set, test_set = create_data(args)

                # Learning
                err_mat[i_method, i_samp_num,
                        i_rep] = learn_latent_model(args, train_set, test_set)
        # save temp results file
        pickle.dump(
            {
                'general_args': general_args,
                'methods_opt': methods_opt,
                'err_mat': err_mat,
                'num_train_samp_grid': num_train_samp_grid
            }, open(save_file + '_Temp_{}'.format(i_method), "wb"))

    pickle.dump(
        {
Esempio n. 14
0
def data(symbol):
    last_hour = (datetime.now() - timedelta(hours=5)).replace(microsecond=0)
    now = datetime.now().replace(microsecond=0)
    last_hour_data = create_data.create_data(symbol, last_hour, now)
    return last_hour_data
Esempio n. 15
0
TOKEN = settings['TOKEN']

if __name__ == '__main__':
    print(
        'Программа-скрипт для сбора информации о других аниме-пабликах. Команды:'
    )
    print('1. create_data - создание первичной базы данных')
    print('2. create_table - создание удобной таблицы первичной базы данных')
    print('3. pubs_info - подробная информация о каждом паблике')
    print('4. Нажмите клавишу Enter для выхода')

    while 1:
        command = input('Введите команду: ')

        if command == 'create_data':
            create_data.create_data(PUBS, TOKEN)
            print('\nПервичная база данных создана')
        elif command == 'create_table':
            DATA = codecs.open(os.path.join(r'.\pub_info', 'data.json'),
                               'r',
                               encoding='utf-8')
            create_table.create_table(DATA)
            print('\nПервичная таблица создана')
        elif command == 'pubs_info':
            DATA = codecs.open(os.path.join(r'.\pub_info', 'data.json'),
                               'r',
                               encoding='utf-8')
            pubs_info.pubs_info(TOKEN, DATA)
            print('\nПодробная статистика пабликов создана')
        elif command == '':
            sys.exit()