def create_left_data(directory): flag='left' # import create_data create_data(directory,flag) print('folder name',directory) # folder = name # print('folder',folder) print("done") return redirect(url_for('right_face',directory = directory))
def create_up_data(directory): flag='up' # import create_data create_data(directory,flag) print('folder name',directory) # folder = name # print('folder',folder) print("done") return redirect(url_for('down_face',directory = directory))
def create_down_data(directory): flag='down' # import create_data create_data(directory,flag) print('folder name',directory) # folder = name # print('folder',folder) auth_name = 'again' print("done") return redirect(url_for('home',auth_name=auth_name))
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--mode", help="mode to run the model. options: [train, test]") args = parser.parse_args() if args.mode == 'train': print("Creating data....") create_data() print("Fitting Model...") model = train_model(nb_epoch=NUMBER_OF_EPOCHS, nb_batch=BATCH_SIZE) elif args.mode == 'test': model = load_model('lstm.h5') predictions = evaluate_predictions(model) plot_predictions(predictions)
def compare_errors(k_vals, input_data_file): ## read in the input data initial_data = create_data(input_data_file) ## create plots of the data (this should save the images within the current ## directory) plot_data(initial_data) ## integerize the data labels integerized_data, label_dict = integerize_labels(initial_data) ## split the data into train and test train, test = split(integerized_data) ## compute the errors errors = {} for k in k_vals: predicted_labels = knn(train, test, k) error_rate = calculate_error_rate(predicted_labels, test) errors[k] = error_rate ## BONUS: weighting for k in k_vals: weighted_predicted_labels = weighted_knn(train, test, k) weighted_error_rate = calculate_error_rate(weighted_predicted_labels, test) print("Weighted error value for k = %d was %f" % (k, weighted_error_rate)) return errors
def split_kfold(input_dir, output_dir, keep_none): train_data = pd.read_csv('./data/Train_Data.csv', sep=',', dtype=str, encoding='utf-8') test_data = pd.read_csv('./data/Test_Data.csv', sep=',', dtype=str, encoding='utf-8') train_data.fillna('', inplace=True) test_data.fillna('', inplace=True) train_data['cleaned_text'] = train_data['text'].apply(create_data.clean) train_data['cleaned_title'] = train_data['title'].apply(create_data.clean) test_data['cleaned_text'] = test_data['text'].apply(create_data.clean) test_data['cleaned_title'] = test_data['title'].apply(create_data.clean) important_chars = create_data.collect_important_chars( train_data['unknownEntities']) create_data.remove_chars(train_data, test_data) dev_kfolds = [] train_kfolds = [] for k in range(FOLD): train_data = train_data.sample(frac=1, random_state=2018 - k).reset_index(drop=True) dev_kfolds.append(train_data.tail(100)) train_kfolds.append(train_data.head(train_data.shape[0] - 100)) for k in range(FOLD): kfold_dir = os.path.join(output_dir, 'fold{}'.format(k)) if not os.path.isdir(kfold_dir): os.mkdir(kfold_dir) create_data.create_data(dev_kfolds[k], f'{kfold_dir}/dev.txt', important_chars, True, keep_none) create_data.create_data(train_kfolds[k], f'{kfold_dir}/train.txt', important_chars, False, keep_none)
'Validation', 'Test Laptop (General)', 'Test Laptop (Same Title) (Space)', 'Test Laptop (Same Title) (No Space)', 'Test Laptop (Different Title) (Space)', 'Test Laptop (Different Title) (No Space)']}) print('\nOutputing models to {} with base name {}\n'.format(folder, model_name)) # Create the folder for the model if it doesn't already exist if not os.path.exists('models/{}'.format(folder)): os.mkdir('models/{}'.format(folder)) # Create the data if it doesn't exist if not os.path.exists('data/train/total_data.csv') or not os.path.exists('data/test/final_laptop_test_data.csv'): create_data() # Load the data train_data = pd.read_csv('data/train/total_data.csv', nrows=TRAIN_SIZE, chunksize=BATCH_SIZE) val_data = pd.read_csv('data/train/total_data.csv', skiprows=TRAIN_SIZE, names=['title_one', 'title_two', 'label', 'index']) del val_data['index'] val_data = val_data.to_numpy() val_labels = val_data[:, 2].astype('float32') val_data = val_data[:, 0:2] test_laptop_data, test_laptop_labels = split_test_data(pd.read_csv('data/test/final_laptop_test_data.csv')) # General laptop test data test_gb_space_data, test_gb_space_labels = split_test_data(pd.read_csv('data/test/final_gb_space_laptop_test.csv')) # Same titles; Substituted storage attributes test_gb_no_space_data, test_gb_no_space_labels = split_test_data(pd.read_csv('data/test/final_gb_no_space_laptop_test.csv')) # Same titles; Substituted storage attributes test_retailer_gb_space_data, test_retailer_gb_space_labels = split_test_data(pd.read_csv('data/test/final_retailer_gb_space_test.csv')) # Different titles; Substituted storage attributes test_retailer_gb_no_space_data, test_retailer_gb_no_space_labels = split_test_data(pd.read_csv('data/test/final_retailer_gb_no_space_test.csv')) # Different titles; Substituted storage attributes print('Loaded all test files')
from create_data import create_data def integerize_labels(data): """Function that takes a numpy matrix and creates an identical numpy array except with integerized labels and a dictionary of the label-int pairs""" #create dictionary label_dict = {} labelnum = len(data.T)-1 count = 0; for rownum in range(0, len(data)): label = data[rownum,labelnum] if label not in label_dict: label_dict[label] = count count += 1 integerized_data = data #integerize labels #thanks mdml #http://stackoverflow.com/questions/19666626 for label in label_dict: integerized_data[integerized_data == label] = label_dict[label] return (integerized_data, label_dict) if __name__ == '__main__': irisdata = create_data("iris") data, labeldict = integerize_labels(irisdata) print(data) print(labeldict)
hist_kws={"cumulative": True}, kde_kws={"cumulative": True}, ) plt.legend() plt.show() print("Diff ill:", ill_diff) print("Diff Control:", control_diff) numpy.savetxt("1-pca.csv", ill_diff, delimiter=",") numpy.savetxt("2-pca.csv", control_diff, delimiter=",") if __name__ == "__main__": data = cd.create_data() mean = cd.mean_diff(data) # Select an ill patient and a control patient and check the diff from mean ill = data[data["K760"] == 2] control = data[data["K760"] == 1] dataFrame = data[data["K760"] != 3] background = data[(data["K760"] == 3) | (data["D50*"] == 3)] background = background.values cpca(ill, control, dataFrame, background) # pca(ill, control, mean) # K-S Test
plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title("Receiver Operating Characteristic (ROC) Curve") plt.legend() plt.show() def plot_confustion_matrix(con_matrix): class_names = [1, 2] # name of classes fig, ax = plt.subplots() tick_marks = np.arange(len(class_names)) plt.xticks(tick_marks, class_names) plt.yticks(tick_marks, class_names) # create heatmap sns.heatmap(pd.DataFrame(con_matrix), annot=True, cmap="YlGnBu", fmt="g") ax.xaxis.set_label_position("top") plt.tight_layout() plt.title("Confusion matrix", y=1.1) plt.ylabel("Actual label") plt.xlabel("Predicted label") plt.show() if __name__ == "__main__": df = cd.create_data() logistic_regression_cpca(df) logistic_regression_pca(df)
def test(corn_weights, scratch_corn_weights): start = time.time() test_net, accuracy = eval_corn_net(corn_weights) print 'Accuracy, trained from ImageNet initialization: %3.1f%%' % ( 100 * accuracy, ) scratch_test_net, scratch_accuracy = eval_corn_net(scratch_corn_weights) print 'Accuracy, trained from random initialization: %3.1f%%' % ( 100 * scratch_accuracy, ) end = time.time() print 'The stage of testing use time: %.2fs' % (end - start) if __name__ == '__main__': # set caffe caffe.set_mode_cpu() # caffe.set_mode_gpu() # caffe.set_device(0) data_path = raw_input("Please input image path: ") create_data(data_path) weights = 'model/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel' niter = raw_input("Please input iterations in training: ") corn_weights, scratch_corn_weights = train(int(niter), weights) test(corn_weights, scratch_corn_weights)
from create_initial_matrix import create_initial_matrix from copying import turing_pattern from create_data import create_data from optimising import optimise from MCMC import MCMC_main, MCMC_save_plot from turing_class import NumericalSolver a = NumericalSolver() a.plot_fig() size = 100 # size of matrix dimensions time_lst = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] # times that we might want to solve for create_initial_matrix(size) # create initial matrix turing_pattern(a=2.8e-4, b=5e-3, tau=2, k=0) # making clean data create_data(time_lst) # making noisy data a_est, b_est = optimise(time=1.0, a_initial=0.0, a_final=1e-3, b_initial=0.0, b_final=1e-2, iters=10) # rough parameter recovery of a and b res, final_guess = MCMC_main(a_init=a_est, b_init=b_est, a_max=1e-3, b_max=1e-2, iters=1000) # this is quite slow MCMC_save_plot(res)
for i_method in methods_to_run: method_t = methods_opt[i_method] args.model_type = method_t[1] args.estimation_type = method_t[2] for i_samp_num, samp_num in enumerate(num_train_samp_grid): args.n_train = samp_num for i_rep in range(n_rep): print('Method {} out of {}'.format(i_method + 1, n_methods)) print('Sample size {} out of {}'.format( i_samp_num + 1, n_samp_grid)) print('Replication {} out of {}'.format(i_rep + 1, n_rep)) # Create Data train_set, test_set = create_data(args) # Learning err_mat[i_method, i_samp_num, i_rep] = learn_latent_model(args, train_set, test_set) # save temp results file pickle.dump( { 'general_args': general_args, 'methods_opt': methods_opt, 'err_mat': err_mat, 'num_train_samp_grid': num_train_samp_grid }, open(save_file + '_Temp_{}'.format(i_method), "wb")) pickle.dump( {
def data(symbol): last_hour = (datetime.now() - timedelta(hours=5)).replace(microsecond=0) now = datetime.now().replace(microsecond=0) last_hour_data = create_data.create_data(symbol, last_hour, now) return last_hour_data
TOKEN = settings['TOKEN'] if __name__ == '__main__': print( 'Программа-скрипт для сбора информации о других аниме-пабликах. Команды:' ) print('1. create_data - создание первичной базы данных') print('2. create_table - создание удобной таблицы первичной базы данных') print('3. pubs_info - подробная информация о каждом паблике') print('4. Нажмите клавишу Enter для выхода') while 1: command = input('Введите команду: ') if command == 'create_data': create_data.create_data(PUBS, TOKEN) print('\nПервичная база данных создана') elif command == 'create_table': DATA = codecs.open(os.path.join(r'.\pub_info', 'data.json'), 'r', encoding='utf-8') create_table.create_table(DATA) print('\nПервичная таблица создана') elif command == 'pubs_info': DATA = codecs.open(os.path.join(r'.\pub_info', 'data.json'), 'r', encoding='utf-8') pubs_info.pubs_info(TOKEN, DATA) print('\nПодробная статистика пабликов создана') elif command == '': sys.exit()