def __init__(self, cfg): self.cfg = cfg self.nEpochs = cfg['nEpochs'] self.checkpoint_dir = cfg['checkpoint'] self.epoch = 1 self.timestamp = int(time.time()) if cfg['gpu_mode']: self.num_workers = cfg['threads'] else: self.num_workers = 0 self.train_dataset = get_data(cfg, cfg['train_dataset'], cfg['data']['upsacle']) self.train_loader = DataLoader(self.train_dataset, cfg['data']['batch_size'], shuffle=True, num_workers=self.num_workers) self.val_dataset = get_data(cfg, cfg['valid_dataset'], cfg['data']['upsacle']) self.val_loader = DataLoader(self.val_dataset, cfg['data']['batch_size'], shuffle=False, num_workers=self.num_workers) self.records = {'Epoch': [], 'PSNR': [], 'SSIM': [], 'Loss': []} if not os.path.exists(self.checkpoint_dir): os.makedirs(self.checkpoint_dir)
def train(**kwargs): #Set attributes for k, v in kwargs.items(): setattr(opt, k, v) if opt.vis: visualizer = Visualizer() # Data data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = DataLoader(data, batch_size=opt.batch_size, shuffle=True) # Model model = LyricsModel(len(word2ix), opt.embedding_dim, opt.latent_dim) if opt.model_path: model.load_state_dict(t.load(opt.model_path, map_location="cpu")) # Define optimizer and loss optimizer = Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() loss_meter = meter.AverageValueMeter() if opt.use_gpu: model.cuda() criterion.cuda() #================================================# # Start Training # #================================================# for epoch in tqdm.tqdm(range(opt.num_epoch)): for (ii, data) in enumerate(dataloader): # Prepare data data = data.long().transpose(1, 0).contiguous() if opt.use_gpu: data = data.cuda() inputs, targets = Variable(data[:-1, :]), Variable(data[1:, :]) outputs, hidden = model(inputs) # Initialize and backward optimizer.zero_grad() loss = criterion(outputs, targets.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.item()) if (1 + ii) % opt.print_every == 0: print("Current Loss: %d" % loss.item()) if opt.vis: visualizer.plot('loss', loss_meter.value()[0]) if (epoch + 1) % 20 == 0: t.save(model.state_dict(), 'checkpoints/%s.pth' % epoch)
def egg(key): data = get_data() if (key in data): datum = data[key] egg_no = datum['egg_number'] hints = shuffle_hints(datum['hints']) return render_template('egg.html', egg_no=egg_no, hints=hints) else: return render_template('404.html', title='404'), 404
def generate(**kwargs): #Set attributes for k, v in kwargs.items(): setattr(opt, k, v) # Data data, word2ix, ix2word = get_data(opt) # Load model model = LyricsModel(len(word2ix), opt.embedding_dim, opt.latent_dim) if opt.model_path: model.load_state_dict( t.load(opt.model_path, map_location=lambda s, l: s)) #================================================# # Start Decoding # #================================================# results = list(opt.start_words) input_ = Variable(t.LongTensor([word2ix["<START>"]])).view(1, 1) hidden = None if opt.use_gpu: model.cuda() input_ = input_.cuda() if opt.prefix_words: for w in opt.prefix_words: output, hidden = model(input_, hidden) input_ = Variable(t.LongTensor([word2ix[w]])).view(1, 1) for i in range(opt.max_gen_len): output, hidden = model(input_, hidden) if i < len(opt.start_words): word = opt.start_words[i] input_ = Variable(t.LongTensor([word2ix[word]])).view(1, 1) else: top_index = output.data[0].topk(1)[1] word = ix2word[top_index.data.item()] results.append(word) input_ = Variable(t.LongTensor([top_index])).view(1, 1) if word == '<EOS>': break if "<EOS>" in results: results.remove("<EOS>") print(''.join(results).rstrip())
def post(self): list_of_names = ['exons', 'cpg', 'fStomach-DS17659', 'fSkin_fibro_bicep_R-DS19745', 'fKidney_renal_cortex_L-DS17550', 'fLung_R-DS15632'] list_of_bedtools = get_data() data = request.json firstGene = data['firstGene'] secondGene = data['secondGene'] indexOfFirstGene = 0 indexOfSecondGene = 0 for i in range(len(list_of_names)): if firstGene == list_of_names[i]: indexOfFirstGene = i elif secondGene == list_of_names[i]: indexOfSecondGene = i random_shuffle_second_gene = list_of_bedtools[indexOfSecondGene].shuffle(genome='hg19', chrom=True) jaccard = list_of_bedtools[indexOfFirstGene].jaccard(random_shuffle_second_gene.sort()) return jaccard
def __init__(self, cfg, name): super(Solver, self).__init__(cfg) self.init_epoch = self.cfg['schedule'] net_name = self.cfg['algorithm'].lower() lib = importlib.import_module('model.' + net_name) net = lib.Net self.model = net(num_channels=self.cfg['data']['n_colors'], base_filter=64, scale_factor=self.cfg['data']['upsacle'], args=self.cfg) self.train_dataset = get_data( self.cfg, str(self.cfg['train_dataset']) + '/' + str(name) + '.png', str(self.cfg['train_dataset']) + '/' + str(name) + '.png', self.cfg['data']['upsacle']) self.train_loader = DataLoader(self.train_dataset, self.cfg['data']['batch_size'], shuffle=False, num_workers=self.num_workers) for iteration, batch in enumerate(self.train_loader, 1): lr, hr, bic, hr_ref, bic_ref, file_name = Variable( batch[0]), Variable(batch[1]), Variable(batch[2]), Variable( batch[4]), Variable(batch[5]), (batch[6]) self.hr_ref = hr_ref self.lr = lr self.file_name = file_name self.noise_init = get_noise( 32, 'noise', (self.cfg['data']['patch_size'] * self.cfg['data']['upsacle'], self.cfg['data']['patch_size'] * self.cfg['data']['upsacle'])) self.noise = self.noise_init.detach().clone() self.optimizer = maek_optimizer(self.cfg['schedule']['optimizer'], cfg, self.model.parameters()) self.loss = CycleLoss(scale=1 / 4, loss_type='MSE') self.log_name = self.cfg['algorithm'] + '_' + str( self.cfg['data']['upsacle']) + '_' + str(self.timestamp) # save log self.writer = SummaryWriter('log/' + str(self.log_name)) save_net_config(self.log_name, self.model) save_yml(cfg, os.path.join('log/' + str(self.log_name), 'config.yml'))
def main(): """ Main execution function to train machine learning models to predict pointsWon in Cy Young races. """ create_directories([ DIAGNOSTICS_DIRECTORY, MODELS_DIRECTORY, PLOTS_DIRECTORY, TEST_SET_DIRECTORY, SHAP_VALUES_DIRECTORY ], parent=DIAGNOSTICS_DIRECTORY) df = get_data() make_diagnostic_plots(df) x_train, y_train, x_test, y_test = create_custom_train_test_split( df, TARGET, 0.2, INDIVIDUAL_ID) custom_cv = create_custom_cv(x_train, INDIVIDUAL_ID, CV_SPLITS) for key, value in MODEL_TRAINING_DICT.items(): train_model(x_train, y_train, x_test, y_test, key, construct_pipeline, value[0], value[1], custom_cv, value[2])
def post(self): list_of_names = [ 'exons', 'cpg', 'fStomach-DS17659', 'fSkin_fibro_bicep_R-DS19745', 'fKidney_renal_cortex_L-DS17550', 'fLung_R-DS15632' ] list_of_bedtools = get_data() data = request.json firstGene = data['firstGene'] secondGene = data['secondGene'] indexOfFirstGene = 0 indexOfSecondGene = 0 for i in range(len(list_of_names)): if firstGene == list_of_names[i]: indexOfFirstGene = i elif secondGene == list_of_names[i]: indexOfSecondGene = i intersect = list_of_bedtools[indexOfFirstGene].jaccard( list_of_bedtools[indexOfSecondGene]) return intersect
def main(args): all_X, all_Y, column_names = data.get_data(get_feature_names=True) input_dim = len(all_X[0]) # Load the models nn = models.get_nn_model(input_dim) rf = models.get_random_forest_model() # Create training set n_train = 1000 X = all_X[:n_train] Y = all_Y[:n_train] # Fit the model nn.fit(X, Y, epochs=50, batch_size=10, verbose=2) # Get some samples to test our model on, our test set test_set_range = (10000, 20000) x_test = all_X[test_set_range[0]:test_set_range[1]] y_test = all_Y[test_set_range[0]:test_set_range[1]] # Calculate predictions nn_predictions = nn.predict(x_test) # Change to 0/1 predictions and get accuracy nn_predictions = map(lambda x: int(round(x)) if not math.isnan(x) else 0, nn_predictions) print "\nNeural Network Accuracy: %.4f%%" % (accuracy_score(y_test, nn_predictions)) rf = rf.fit(X, Y.reshape(n_train, )) rf_predictions = rf.predict(x_test) print "Random Forest Accuracy: %.4f%%" % (accuracy_score(y_test, rf_predictions)) # Importance as found by random forest model if args.feature_importance: print "\nRandom Forest Feature importance:" for i in zip(column_names, rf.feature_importances_): print "%s importance: %.2f" % i
path_check = '{}/check/checkpoint.pt'.format(eval_args.model) torch.manual_seed(eval_args.seed) ############### ## Load args ## ############### with open(path_args, 'rb') as f: args = pickle.load(f) ################## ## Specify data ## ################## _, _, data_shape = get_data(args) ################### ## Specify model ## ################### model = get_model(args, data_shape=data_shape) if args.parallel == 'dp': model = DataParallelDistribution(model) checkpoint = torch.load(path_check) model.load_state_dict(checkpoint['model']) print('Loaded weights for model at {}/{} epochs'.format( checkpoint['current_epoch'], args.epochs)) ############ ## Sample ##
def dataConstructor(data=get_data(), w=get_weather()): """Add the weather's, holidays and daylight hours varaibles to the bicing dataframe from get_data(). Parameters ---------- data : pandas.DataFrame data from function get_data() Returns ------- data : with new columns """ data = add_holidays(data) data = add_daylight_hrs(data) data = add_weather(data, w) smallDataFrames = False if smallDataFrames: # https://www.reddit.com/r/learnpython/comments/5err0o/memoryerror_merging_two_dataframes_with_pandas/ # MemoryError merging two dataframes with pandas will happen in line: X = X.join(pd.get_dummies(data[k])) # Therefore, just skip indeces data["day_of_week"] = data.index.day_name() data["hour"] = data.index.hour else: data.reset_index(level=0, inplace=True) data["day_of_week"] = data.loc[:, "updateTime"].dt.day_name() data["hour"] = data.loc[:, "updateTime"].dt.hour try: data.drop(columns=["updateTime"], inplace=True) except Exception as err: print(f"\tError: {err}" + "\n" + 80 * "~") boolVars = ["status", "type"] dummyVars = ["day_of_week"] X = data.drop(columns=[ *boolVars, *dummyVars, "latitude", "longitude", "nearbyStations", "streetName", "streetNumber", ]) # .copy() for k in boolVars: X[k] = LabelEncoder().fit_transform(data[k].values) for k in dummyVars: X = X.join(pd.get_dummies(data[k])) if 0: X.rename( columns={ "Monday": "Day1", "Tuesday": "Day2", "Wednesday": "Day3", "Thursday": "Day4", "Friday": "Day5", "Saturday": "Day6", "Sunday": "Day7", }, inplace=True, ) return X
## Setup ## ########### parser = argparse.ArgumentParser() add_exp_args(parser) add_data_args(parser) add_model_args(parser) add_optim_args(parser) args = parser.parse_args() set_seeds(args.seed) ################## ## Specify data ## ################## train_loader, eval_loader = get_data(args) data_id = get_data_id(args) ################### ## Specify model ## ################### model = get_model(args) model_id = get_model_id(args) ####################### ## Specify optimizer ## ####################### optimizer, scheduler_iter, scheduler_epoch = get_optim(args, model.parameters())
# Adjust args args.name = time.strftime("%Y-%m-%d_%H-%M-%S") args.epochs = more_args.new_epochs args.lr = more_args.new_lr args.resume = None # Store more_args args.start_model = more_args.model args.new_epochs = more_args.new_epochs args.new_lr = more_args.new_lr ################## ## Specify data ## ################## train_loader, eval_loader, data_shape = get_data(args) data_id = get_data_id(args) ################### ## Specify model ## ################### model = get_model(args, data_shape=data_shape) model_id = get_model_id(args) ####################### ## Specify optimizer ## ####################### optimizer, _, _ = get_optim(args, model) optim_id = 'more'
raise Exception("framework not support!!!") from data.data import get_data_npz, get_data from model_search import run_model_search_cnn, run_model_search_mlp if args.dataset[-4:] == '.npz': # use .npz files dataset = args.dataset[:-4] data = get_data_npz(data_folder=args.data_folder, dataset=args.dataset, val_split=args.val_split, problem_type=args.problem_type) else: # use framework built-in datasets dataset = args.dataset data = get_data(data_folder=args.data_folder, dataset=args.dataset, val_split=args.val_split, augment=args.augment) dataset_code = 'M' if dataset == 'mnist' else 'F' if dataset == 'fmnist' else 'R' if dataset == 'rcv1_2000' else 'XXX' if args.network == 'cnn': run_model_search_cnn(data=data, dataset_code=dataset_code, input_size=args.input_size, output_size=args.output_size, problem_type=args.problem_type, verbose=args.verbose, wc=args.wc, tbar_epoch=args.tbar_epoch, numepochs=args.numepochs, val_patience=args.val_patience, bo_prior_states=args.bo_prior_states,
torch.manual_seed(eval_args.seed) ############### ## Load args ## ############### with open(path_args, 'rb') as f: args = pickle.load(f) args.batch_size = eval_args.samples ################## ## Specify data ## ################## eval_loader, data_shape, cond_shape = get_data(args, eval_only=True) ################### ## Specify model ## ################### device = 'cuda' if torch.cuda.is_available() else 'cpu' model = get_model(args, data_shape=data_shape, cond_shape=cond_shape) if args.parallel == 'dp': model = DataParallelDistribution(model) checkpoint = torch.load(path_check, map_location=torch.device(device)) model.load_state_dict(checkpoint['model']) model = model.to(device) model = model.eval() print('Loaded weights for model at {}/{} epochs'.format(
import sys sys.path.append('/home/polichism/test/') from data import data as _data if __name__ == "__main__": print _data.get_data('http://www.google.com')