def main(): train_X, train_Y, test_X, test_Y = load_2D_dataset(False) np.random.seed(1) parameters = initialize_parameters_random([train_X.shape[0], 15, 10, 1]) print('Training without Regularization..') trained_weights = train(train_X, train_Y, parameters, iterations=30000, learning_rate=0.5) print('Prediction on Train set:') predict(train_X, train_Y, trained_weights) print('Prediction on Dev set:') predict(test_X, test_Y, trained_weights) plot_decision_boundary(lambda x: predict_dec(trained_weights, x.T), train_X, train_Y) for l in [0.01, 0.03, 0.1, 0.3, 1, 3, 10]: print('\nTraining with L2 Regularization (lambda = {})'.format(l)) parameters = initialize_parameters_random( [train_X.shape[0], 15, 10, 1]) trained_weights = train(train_X, train_Y, parameters, \ iterations=30000, learning_rate=0.3, lambd=l, print_cost=True) predict(train_X, train_Y, trained_weights) predict(test_X, test_Y, trained_weights)
def predict_data(model_path:str,data_path:str,output_path:str): with open(model_path,'rb') as f: model = pickle.load(f) df = pd.read_csv(data_path) X,y = preprocess.preprocess(df) predictions = predict.predict(model,X) pd.DataFrame(predictions).to_csv(output_path,index=False) print("Predictions Done") print("Output file saved in: %s" % output_path)
def train_model(model_path:str,data_path:str,prov:str='Córdoba'): df=pd.read_csv(data_path) X,y = preprocess.preprocess(df,prov) model,X_test,y_test = train.train_randomforest(X,y) predictions = predict.predict(model,X_test) mse = mean_squared_error(y_test,predictions) r2 = r2_score(y_test,predictions) with open(model_path,'wb') as f: pickle.dump(model,f) print("All Done") print("Metrics - MSE: {} R2: {}".format(mse,r2)) print("Model Saved in: %s" % model_path)
def main(path: str): raw_files = [] for files in glob.glob(path): raw_files.append(files) df = pd.read_csv(raw_files[0]) X,y = preprocess.preprocess(df) model,X_test,y_test = train.train_randomforest(X,y) predictions = predict.predict(model,X_test) mse = mean_squared_error(y_test,predictions) r2 = r2_score(y_test,predictions) print("All Done") print("Metrics - MSE: {} R2: {}".format(mse,r2)) preds = pd.DataFrame(predictions).reset_index().drop(columns='index') real = pd.DataFrame(y_test).reset_index().drop(columns='index') output = pd.concat([preds,real],axis=1,ignore_index=True) output.columns = ['preds','real'] output.to_csv('output.csv',index=False)
def main(): loss_norm = [] loss_difference = [] countries = [ 'Afghanistan', 'Indien', 'Irak', 'Kolumbien', 'Pakistan', 'Philippinen', 'sandbox_attacks', 'test_exp_chirp' ] for country in countries: data_name = country data_dir = '../../' + data_name + '.csv' train_data_scaled, train_date, test_data, test_date, std = prepare_data( data_dir, normalize=True, scaling='minmax') model_dir = './model/' num_epoch = 1000 n_steps = 100 n_inputs = 1 n_neurons = 30 n_layers = 1 print(country) sess, train_loss, epoch_count = training(train_data_scaled, model_dir, num_epoch=num_epoch, n_steps=n_steps, n_inputs=n_inputs, n_neurons=n_neurons, n_layers=n_layers) print() prediction, true_labels, label_dates = predict(test_data, test_date, sess, std, model_dir, n_steps, n_inputs) # rescaled_prediction = std.inverse_transform(prediction.reshape(-1,1)) # rescaled_labels = std.inverse_transform(true_labels.reshape(-1,1)) loss_norm.append(normed_loss(prediction, true_labels)) loss_difference.append(differential_loss(prediction, true_labels)) plt.figure() plt.subplot(211) plt.plot(epoch_count, train_loss) plt.title('Training loss') plt.xlabel('Epoch') plt.ylabel('Training MSE') plt.subplot(212) plt.plot_date(label_dates, true_labels, xdate=True, label='Labels', ls="-") plt.plot_date(label_dates, prediction, xdate=True, label='Predictions', ls="-") plt.xticks(rotation="vertical") plt.title('Prediction') plt.legend() plt.xlabel('Days') plt.ylabel('Attack') save_fig('predicted value feedback' + data_name, './Images/') loss_dict = { 'Countries': countries, 'Normed_loss': loss_norm, 'Differential_loss': loss_difference } pd.DataFrame(loss_dict).to_csv('./RNN_loss.csv')
from modules import preprocess, train, predict import glob import sys import pandas as pd if __name__ == '__main__': raw_files = [] path = '../data/raw/*.csv.gz' for files in glob.glob(path): raw_files.append(files) path = raw_files[0] df = pd.read_csv(path) df = preprocess.preprocess(df) model, X_test, y_test = train.train_randomforest(df) predictions, mse, r2 = predict.predict(model, X_test, y_test) print("All Done") print("Metrics - MSE: {} R2: {}".format(mse, r2)) preds = pd.DataFrame(predictions).reset_index().drop(columns='index') real = pd.DataFrame(y_test).reset_index().drop(columns='index') output = pd.concat([preds, real], axis=1, ignore_index=True) output.columns = ['preds', 'real'] output.to_csv('output.csv', index=False)
def polish_genome(assembly, model_path, sketch_path, genus, threads, output_dir, minimap_args, mash_threshold, download_contig_nums, debug): out = [] output_dir = FileManager.handle_output_directory(output_dir) contig_output_dir_debug = output_dir + '/debug' contig_output_dir_debug = FileManager.handle_output_directory( contig_output_dir_debug) assembly_name = assembly.rsplit('/', 1)[-1] assembly_name = assembly_name.split('.')[0] total_start_time = time.time() for contig in SeqIO.parse(assembly, 'fasta'): timestr = time.strftime("[%Y/%m/%d %H:%M]") sys.stderr.write(TextColor.GREEN + str(timestr) + " INFO: RUN-ID: " + contig.id + "\n" + TextColor.END) contig_output_dir = contig_output_dir_debug + '/' + contig.id contig_output_dir = FileManager.handle_output_directory( contig_output_dir) contig_name = contig_output_dir + '/' + contig.id + '.fasta' SeqIO.write(contig, contig_name, "fasta") if sketch_path: screen_start_time = time.time() print_system_log('MASH SCREEN') mash_file = mash.screen(contig_name, sketch_path, threads, contig_output_dir, mash_threshold, download_contig_nums, contig.id) screen_end_time = time.time() ncbi_id = mash.get_ncbi_id(mash_file) if len( ncbi_id ) < 5: #Would'nt polish if closely-related genomes less than 5 out.append(contig_name) continue url_list = download.parser_url(ncbi_id) if genus: ncbi_id, url_list = download.parser_genus(genus) download_start_time = time.time() print_system_log('DOWNLOAD CONTIGS') db = download.download(contig_output_dir, ncbi_id, url_list) download_end_time = time.time() pileup_start_time = time.time() print("\n") print_system_log('PILE UP') db_npz = alignment.align(contig_name, minimap_args, threads, db, contig_output_dir) if db_npz == False: continue pileup_end_time = time.time() align2df_start_time = time.time() print_system_log('TO DATAFRAME') df = align2df.todf(contig_name, db_npz, contig_output_dir) align2df_end_time = time.time() predict_start_time = time.time() print_system_log('PREDICT') df = contig_output_dir + '/' + contig.id + '.feather' result = predict.predict(df, model_path, threads, contig_output_dir) predict_end_time = time.time() polish_start_time = time.time() print_system_log('POLISH') finish = polish.stitch(contig_name, result, contig_output_dir) polish_end_time = time.time() if sketch_path: screen_time = get_elapsed_time_string(screen_start_time, screen_end_time) print_stage_time('SCREEN', screen_time) #calculating time download_time = get_elapsed_time_string(download_start_time, download_end_time) pileup_time = get_elapsed_time_string(pileup_start_time, pileup_end_time) align2df_time = get_elapsed_time_string(align2df_start_time, align2df_end_time) predict_time = get_elapsed_time_string(predict_start_time, predict_end_time) polish_time = get_elapsed_time_string(polish_start_time, polish_end_time) #print stage time print_stage_time('DOWNLOAD', download_time) print_stage_time('PILEUP', pileup_time) print_stage_time('TO DATAFRAME', align2df_time) print_stage_time('PREDICT', predict_time) print_stage_time('POLISH', polish_time) out.append(finish) os.system('cat {} > {}/{}_homopolished.fasta'.format( ' '.join(out), output_dir, assembly_name)) if debug: try: shutil.rmtree(contig_output_dir_debug) except OSError as e: print(e) else: return True total_end_time = time.time() total_time = get_elapsed_time_string(total_start_time, total_end_time) print_stage_time('Total', total_time)
def main(): print(os.listdir("./platesv2/")) # Any results you write to the current directory are saved as output. data_root = './platesv2/plates/' print(os.listdir(data_root)) # Create directory "val_dir" for validation train_dir = 'train' val_dir = 'val' class_names = ['cleaned', 'dirty'] for dir_name in [train_dir, val_dir]: for class_name in class_names: os.makedirs(os.path.join(dir_name, class_name), exist_ok=True) # Move each 6-th photo from "train_dir" to "val_dir" for validation create_val(data_root, train_dir, val_dir, class_names, 6) # Apply augmentations and form datasets for training and validation train_dataset, val_dataset = form_dataset(test=False) # We will feed the net with data in the form of batches batch_size = 8 train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=batch_size) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=batch_size) # Let's have a look at the first batch X_batch, y_batch = next(iter(train_dataloader)) for x_item, y_item in zip(X_batch, y_batch): show_input(x_item, title=class_names[y_item]) # Model - pretrained ResNet18, trained on ImageNet model = models.resnet18(pretrained=True) # Disable grad for all conv layers for param in model.parameters(): param.requires_grad = False print("Output of ResNet18 before FC layer, that we add later: ", model.fc.in_features) # Add FC layer with 2 outputs: cleaned or dirty model.fc = torch.nn.Linear(model.fc.in_features, 2) # Put model on GPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) # Loss function - binary Cross-Entropy loss = torch.nn.CrossEntropyLoss() # Optimization method - Adam optimizer = torch.optim.Adam(model.parameters(), amsgrad=True, lr=1.0e-3) # Decay LR by a factor of 0.1 every 7 epochs scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # Training print("Begin training: ") train(model, train_dataloader, val_dataloader, loss, optimizer, scheduler, device, num_epochs=10) # Now let's make predictions test_dir = 'test' # Make additional directory in test to let ImageFolder identify pictures in it and create iterator shutil.copytree(os.path.join(data_root, 'test'), os.path.join(test_dir, 'unknown')) test_dataset = form_dataset(test=True) #Form batches on test test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=batch_size) model.eval() # Make predictions test_predictions = [] test_img_paths = [] test_predictions, test_img_paths = predict(test_dataloader, model, device) # Show predictions for the test inputs, labels, paths = next(iter(test_dataloader)) for img, pred in zip(inputs, test_predictions): show_input(img, title=pred) # Submit print("Making submission") make_submit(test_img_paths, test_predictions)
arguments = docopt(__doc__, version='opensoundscape.py version 0.0.1') # Get the default config variables config = generate_config("config/opensoundscape.ini", arguments["--ini"]) # Initialize empty string for arguments['<image>'] (not supported by docopt) if not arguments['<image>']: arguments['<image>'] = '' if arguments['spect_gen']: # Pass the configuration to spect_gen spect_gen(config) elif arguments['view']: # Preprocess the file with the defaults # -> optionally write image to a file view(arguments['<label>'], arguments['<image>'], arguments['--segments'], config) elif arguments['model_fit']: # Using defined algorithm, create model model_fit(config, arguments['--rerun_statistics']) elif arguments['predict']: # Make a prediction based on a model predict(config) elif arguments['init']: # Initialize INI section init(config)
def process_item(self, item, spider): sentiment_dictionary = predict.predict(item) return sentiment_dictionary
print(" * (8) for Bar plot visualization of Clusters") print(" * (9) for Pie plot visualization of Clusters") choice = int(input("Enter action: ")) if choice == 1: # Previewing the records - Plots a basic view preview.preview(csv_file) elif choice == 2: # Training data train() elif choice == 3: # Exiting Program break elif choice == 4: # Predict Cluster of Student predict.predict(CLUSTERS) elif choice == 5: # Plots more detailed preview preview.rich_preview(CLUSTERS) elif choice == 6: # Naming Clusters identify.name_clusters(CLUSTERS, NAMED.get()) NAMED.set(True) elif choice == 7: # Plots a basic View pt.view(CLUSTERS) elif choice == 8: # Plots a bar graph representation preview.bar_plot(CLUSTERS) elif choice == 9: # Plots a pie chart representation