def collect_command( user_handle: str, parent: str = ":root", data_types: List[str] = ["dataverses", "datasets", "datafiles"], filename: str = "tree.json", create_json: bool = True, ) -> None: collect_data(user_handle, parent, data_types, filename, create_json) typer.echo("Data collected")
def train(self, pi): self._initialize_data_buffers() self._record_snapshot(pi, None, None, None) for t in tqdm(range(self.time_steps)): # collect new data xprop, yprop, sprop, d = utils.collect_data( self.td, self.n_samples, policy=self.deployed["pis"][t], fix_proposed=self.fix_prop, random_state=self.data_seeds[t], ) x, y, s = xprop[d], yprop[d], sprop[d] accepted = len(y) if accepted < 1: # didn't get any data, continue logger.warning(f"0 accepted; continue") else: self._update_data_buffers(x, y, s, None, t, accepted) # update logistic model in pi try: pi.set_theta( np.array(utils.fit_logit(self.x_buf, self.y_buf)) ) except (PerfectSeparationError, LinAlgError) as err: logger.info(f"Error in LogReg: {err}") self._record_snapshot(pi, yprop, sprop, d) return self._merge_and_convert_results()
def train(self, pi): self._record_snapshot(pi, None, None, None) for t in tqdm(range(self.time_steps)): xprop, yprop, sprop, d = utils.collect_data( self.td, self.n_samples, policy=pi, fix_proposed=self.fix_prop, random_state=self.data_seeds[t], ) _, y, _ = xprop[d], yprop[d], sprop[d] # the oracle if pi is None: d = y == 1 y = y[d] accepted = len(y) if accepted < 1: logger.warning(f"0 accepted; continue") self._record_snapshot(pi, yprop, sprop, d) return self._merge_and_convert_results()
def train(self, pi): lr_timestep = self.lr_init self._initialize_data_buffers() self._record_snapshot(pi, None, None, None) for t in tqdm(range(self.time_steps)): lr_timestep = self._next_learning_rate_timestep(lr_timestep, t) # collect new data xprop, yprop, sprop, d = utils.collect_data( self.td, self.n_samples, policy=self.deployed["pis"][t], fix_proposed=self.fix_prop, random_state=self.data_seeds[t], ) x, y, s = xprop[d], yprop[d], sprop[d] if self.data_type == "all": w = self._get_weights(x, self.deployed["pis"][t]) else: w = None accepted = len(y) if accepted < 1: # didn't get any data, continue logger.warning(f"0 accepted; continue") else: self._update_data_buffers(x, y, s, w, t, accepted) # epochs for e in range(self.epochs): perm = np.random.permutation(accepted) xp, yp, sp = x[perm], y[perm], s[perm] if self.data_type == "all": wp = self.w_buf[perm] # minibatches for i1 in range(0, accepted, self.batchsize): i2 = min(i1 + self.batchsize, accepted) xb, yb, sb = xp[i1:i2], yp[i1:i2], sp[i1:i2] wb = None if self.data_type != "all" else wp[i1:i2] # gradient step grad = self._grad_utility( (xb, yb, sb), pi, self.deployed["pis"][t], weights=wb, ) pi.theta += lr_timestep * grad self._record_snapshot(pi, yprop, sprop, d) return self._merge_and_convert_results()
def init_preproc_workflow(bids_dir, output_dir, work_dir, subject_list, session_label, task_label, run_label): """ A workflow for preprocessing complex-valued multi-echo fMRI data with single-band reference images and available T1s. """ # setup workflow participant_wf = pe.Workflow(name='participant_wf') participant_wf.base_dir = os.path.join(work_dir, 'complex_preprocessing') os.makedirs(participant_wf.base_dir, exist_ok=True) # Read in dataset, but don't validate because phase isn't supported yet layout = BIDSLayout(bids_dir, validate=False) for subject_label in subject_list: # collect the necessary inputs subject_data = collect_data(layout, subject_label, task=task_label, run=run_label, ses=session_label) single_subject_wf = init_single_subject_wf( name='single_subject_' + subject_label + '_wf', output_dir=output_dir, layout=layout, bold_files=subject_data['bold_mag_files'], bold_metadata=subject_data['bold_mag_metadata'], phase_files=subject_data['bold_phase_files'], phase_metadata=subject_data['bold_phase_metadata'], ) single_subject_wf.config['execution']['crashdump_dir'] = os.path.join( output_dir, 'sub-' + subject_label, 'log') for node in single_subject_wf._get_all_nodes(): node.config = deepcopy(single_subject_wf.config) participant_wf.add_nodes([single_subject_wf]) return participant_wf
def main(): torch.cuda.manual_seed(seed) cudnn.benchmark = CUDNN # model model = tiramisu.FcDnSubtle(in_channels=8, n_classes=N_CLASSES) model = model.cuda() print(' + Number of params: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model.apply(utils.weights_init) optimizer = optim.RMSprop(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, eps=1e-12) criterion = nn.NLLLoss2d().cuda() exp = experiment.Experiment(EXPNAME, EXPERIMENT) exp.init() START_EPOCH = exp.epoch END_EPOCH = START_EPOCH + N_EPOCHS # for epoch in range(1): for epoch in range(START_EPOCH, END_EPOCH): since = time.time() ### Collect data ### # delete existing folder and old data if os.path.exists(res_root_path): shutil.rmtree(res_root_path) utils.collect_data(ori_train_base_rp, res_train_base_rp) utils.collect_data(ori_val_base_rp, res_val_base_rp) # data loader train_loader, val_loader = utils.data_loader(res_root_path) ### Train ### trn_loss, trn_err = utils.train(model, train_loader, optimizer, criterion, epoch) print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format( epoch, trn_loss, trn_err)) time_elapsed = time.time() - since print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) ### val ### val_loss, val_err = utils.test(model, val_loader, criterion, epoch) print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err)) time_elapsed = time.time() - since print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60)) ### Save Metrics ### exp.save_history('train', trn_loss, trn_err) exp.save_history('val', val_loss, val_err) ### Checkpoint ### exp.save_weights(model, trn_loss, val_loss, trn_err, val_err) exp.save_optimizer(optimizer, val_loss) ## Early Stopping ## if (epoch - exp.best_val_loss_epoch) > MAX_PATIENCE: print(("Early stopping at epoch %d since no " + "better loss found since epoch %.3").format( epoch, exp.best_val_loss)) break utils.adjust_learning_rate(LEARNING_RATE, LR_DECAY, optimizer, epoch, DECAY_LR_EVERY_N_EPOCHS) exp.epoch += 1
random.shuffle(train_set) dev_set = train_set[:200] train_set = train_set[200:] print('Number of training samples:' + str(len(train_set))) print('Number of development samples:' + str(len(dev_set))) print("Number of node: " + str(len_node) + ", while max allowed is " + str(options.max_node_num)) print("Number of parent node: " + str(len_in_node) + ", truncated to " + str(options.max_in_node_num)) print("Number of child node: " + str(len_out_node) + ", truncated to " + str(options.max_out_node_num)) print("The entity size: " + str(entity_size) + ", truncated to " + str(options.max_entity_size)) # Build dictionary and mapping of words, characters, edges words, chars, edges = collect_data(train_set) print('Number of words:' + str(len(words))) print('Number of characters:' + str(len(chars))) print('Number of edges:' + str(len(edges))) dict_word, word_to_id, id_to_word = word_mapping(words) dict_char, char_to_id, id_to_char = char_mapping(chars) dict_edge, edge_to_id, id_to_edge = edge_mapping(edges) options.word_to_id = word_to_id options.char_to_id = char_to_id options.edge_to_id = edge_to_id if options.binary_classification: options.relation_num = 2 else:
def main(): # load parameters for run... parameters = yaml.load(open(PARAM_FILE_DIRECTORY)) db_defs = parameters['database'] solver_param = parameters['solver_param'] model_param = parameters['model_param'] model_specific_params = parameters[model_param['model_type']] print('Collect Data ....') data, count, dictionary, reverse_dictionary = collect_data(vocabulary_size=model_param['vocabulary_size']) print('Done Collect Data.') #skip_window = 2 # How many words to consider left and right. #num_skips = 2 # How many times to reuse an input to generate a label. # We pick a random validation set to sample nearest neighbors. Here we limit the # validation samples to the words that have a low numeric ID, which by # construction are also the most frequent. num_sampled = 64 # Number of negative examples to sample. ''' print('trying to connect to db...') # create connection to the database. db = mysql.connect(host=db_defs['ip'], user=db_defs['usrid'], passwd=db_defs['password'], db=db_defs['name']) print('connected to db...') ''' ''' # setup params for training ckpt_file=os.path.join(options.dir,options.model_name) log_dir=os.path.join(options.logdir,'logs') print(ckpt_file) print(log_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) ''' print('try to import model') # build model... model = create_model(model_param,model_specific_params) print('Model drawn') # Initialize the solver object. solver = Solver(model) # train model.... solver.train(data,dictionary,reverse_dictionary,solver_param) print('done!')
from utils import collect_data, plot_img_dep, load_data import sys # set recursion limit so pickle doesn't error sys.setrecursionlimit(40000) random_state = np.random.RandomState(1999) DEBUG = True volume_path = '../data/train/' model_path = 'models' if not os.path.exists(model_path): os.mkdir(model_path) n_epochs = 200 minibatchsize = 20 images, dmaps = collect_data(volume_path) # only keep minibatch divisible num data num_images = len(images) - (len(images) % minibatchsize) images = images[:num_images] dmaps = dmaps[:num_images] print("Using %s images" % num_images) X_train, y_train = load_data(images[0:minibatchsize], dmaps[0:minibatchsize]) # theano land tensor4 for 4 dimensions input_var = tensor.tensor4('X') target_var = tensor.tensor4('y') outchan = y_train.shape[1] inchan = X_train.shape[1] width = X_train.shape[2] height = X_train.shape[3]
import sys # set recursion limit so pickle doesn't error sys.setrecursionlimit(40000) random_state = np.random.RandomState(1999) DEBUG = True volume_path = '../data/train/' model_path = 'models' if not os.path.exists(model_path): os.mkdir(model_path) n_epochs = 200 minibatchsize = 20 images, dmaps = collect_data(volume_path) # only keep minibatch divisible num data num_images = len(images) - (len(images) % minibatchsize) images = images[:num_images] dmaps = dmaps[:num_images] print("Using %s images" %num_images) X_train, y_train = load_data(images[0:minibatchsize], dmaps[0:minibatchsize]) # theano land tensor4 for 4 dimensions input_var = tensor.tensor4('X') target_var = tensor.tensor4('y') outchan = y_train.shape[1] inchan = X_train.shape[1] width = X_train.shape[2]
def main(): cudnn.benchmark = True deeplab_caffe2pytorch = 'train_iter_20000.caffemodel.pth' print('load model:', deeplab_caffe2pytorch) pretrained_model = torch.load(deeplab_caffe2pytorch) model = UNet_deeplab(in_channels=4, feature_length=512) model = model.init_parameters(pretrained_model) # seperate layers, to set different lr param_exist = [] param_add = [] for k, (name, module) in enumerate(model.named_children()): # existing layers including: conv1~conv5, fc6, fc7 if k < 7: for param in module.parameters(): param_exist.append(param) # adding layers including: fc7_1 else: for param in module.parameters(): param_add.append(param) model = model.cuda() print(' + Number of params: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) optimizer = optim.RMSprop([{'params': param_exist, 'lr': LEARNING_RATE*0.1}, {'params': param_add}], lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, eps=1e-12) # use margin=2 criterion = nn.TripletMarginLoss(margin=2, p=2).cuda() exp_dir = EXPERIMENT + 'ranking-test' if os.path.exists(exp_dir): shutil.rmtree(exp_dir) exp = experiment.Experiment('ranking-test', EXPERIMENT) exp.init() START_EPOCH = exp.epoch END_EPOCH = START_EPOCH + N_EPOCHS for epoch in range(START_EPOCH, END_EPOCH): since = time.time() # # ### Collect data ### # # delete existing folder and old data if os.path.exists(res_root_path): shutil.rmtree(res_root_path) utils.collect_data(ori_train_base_rp, res_train_base_rp) utils.collect_data(ori_val_base_rp, res_val_base_rp) # data loader train_loader, val_loader = utils.data_loader(res_root_path) # # ### Train ### trn_loss = utils.train(model, train_loader, optimizer, criterion, epoch) trn_err = 0 print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format(epoch, trn_loss, trn_err)) time_elapsed = time.time() - since print('Train Time {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) ### Test ### val_loss = utils.test(model, val_loader, criterion, epoch) val_err = 0 print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err)) time_elapsed = time.time() - since print('Total Time {:.0f}m {:.0f}s\n'.format( time_elapsed // 60, time_elapsed % 60)) ### Save Metrics ### exp.save_history('train', trn_loss, trn_err) exp.save_history('val', val_loss, val_err) ### Checkpoint ### exp.save_weights(model, trn_loss, val_loss, trn_err, val_err) exp.save_optimizer(optimizer, val_loss) ## Early Stopping ## if (epoch - exp.best_val_loss_epoch) > MAX_PATIENCE: print(("Early stopping at epoch %d since no " +"better loss found since epoch %.3").format(epoch, exp.best_val_loss)) break # Adjust Lr ###--old method utils.adjust_learning_rate(LEARNING_RATE, LR_DECAY, optimizer, epoch, DECAY_LR_EVERY_N_EPOCHS) exp.epoch += 1
def main(): torch.cuda.manual_seed(seed) cudnn.benchmark = CUDNN model = tiramisu.FCDenseNet57(n_classes=N_CLASSES) #model = model.cuda() model = torch.nn.DataParallel(model).cuda() print(' + Number of params: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model.apply(utils.weights_init) optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=0.0005) criterion = nn.NLLLoss2d().cuda() exp_dir = EXPERIMENT + 'Objectness' if os.path.exists(exp_dir): shutil.rmtree(exp_dir) exp = experiment.Experiment('Objectness', EXPERIMENT) exp.init() START_EPOCH = exp.epoch END_EPOCH = START_EPOCH + N_EPOCHS for epoch in range(1, END_EPOCH): since = time.time() # # ### Collect data ### # # # delete existing folder and old data cont_rp = data_root_path + 'traincont/' if os.path.exists(cont_rp): shutil.rmtree(cont_rp) utils.collect_data(data_root_path, 'train') cont_rp = data_root_path + 'valcont/' if os.path.exists(cont_rp): shutil.rmtree(cont_rp) utils.collect_data(data_root_path, 'val') # data loader train_loader, val_loader = utils.data_loader(data_root_path) ### Train ### trn_loss, trn_err = utils.train(model, train_loader, optimizer, criterion, epoch) print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format( epoch, trn_loss, trn_err)) time_elapsed = time.time() - since print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) ### Test ### val_loss, val_err = utils.test(model, val_loader, criterion, epoch) print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err)) time_elapsed = time.time() - since print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60)) ### Save Metrics ### exp.save_history('train', trn_loss, trn_err) exp.save_history('val', val_loss, val_err) ### Checkpoint ### exp.save_weights(model, trn_loss, val_loss, trn_err, val_err) exp.save_optimizer(optimizer, val_loss) ## Early Stopping ## if (epoch - exp.best_val_loss_epoch) > MAX_PATIENCE: print(("Early stopping at epoch %d since no " + "better loss found since epoch %.3").format( epoch, exp.best_val_loss)) break # Adjust Lr ###--old method if epoch % 4 == 0: utils.adjust_learning_rate(LEARNING_RATE, LR_DECAY, optimizer, epoch, DECAY_LR_EVERY_N_EPOCHS) exp.epoch += 1
from tensorflow.keras.preprocessing import sequence import numpy as np from negative_sampling_model import NegativeSamplingWord2VecEmbedding window_size = 3 vector_dim = 300 epochs = 5 valid_size = 16 # Random set of words to evaluate similarity on. valid_window = 100 # Only pick dev samples in the head of the distribution. valid_examples = np.random.choice(valid_window, valid_size, replace=False) vocab_size = 10000 embedding_dim = 300 data, count, dictionary, reverse_dictionary = collect_data( vocabulary_size=vocab_size) sampling_table = sequence.make_sampling_table(vocab_size) couples, labels = sequence.skipgrams(data, vocab_size, window_size=window_size, sampling_table=sampling_table) # word_target, word_context = zip(*couples) # word_target = np.array(word_target, dtype="int32") # word_context = np.array(word_context, dtype="int32") print(couples[:10], labels[:10]) train_ds = tf.data.Dataset.from_tensor_slices( (couples, labels)).shuffle(10000).batch(32)
def get_data(): # Esegue la funzione solo la priva volta che viene vista utils.collect_data()
def main(): cudnn.benchmark = True densenet201 = torchvision.models.densenet201(pretrained=True) dict_densenet201 = densenet201.state_dict() model = DenseNet.densenet201(vector_len=512) # # initialize DenseNet_dict = model.state_dict() pretrained_dict = { k: v for k, v in dict_densenet201.items() if k in DenseNet_dict } # for k in pretrained_dict: # print(k) DenseNet_dict.update(pretrained_dict) model.load_state_dict(DenseNet_dict) # seperate layers, to set different lr param_exist = [] param_add = [] for k, (name, module) in enumerate(model.named_children()): # existing layers including: self.features if k == 1: for param in module.parameters(): param_exist.append(param) # adding layers including: self.classifier else: for param in module.parameters(): param_add.append(param) model = model.cuda() # model = torch.nn.DataParallel(model).cuda() print(' + Number of params: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) optimizer = optim.SGD([{ 'params': param_exist, 'lr': LEARNING_RATE * 0.1 }, { 'params': param_add }], lr=LEARNING_RATE, momentum=0.9, weight_decay=0.0005) # use margin=2 criterion = nn.TripletMarginLoss(margin=2, p=2).cuda() exp_dir = EXPERIMENT + 'rankingVGG' if os.path.exists(exp_dir): shutil.rmtree(exp_dir) exp = experiment.Experiment('rankingVGG', EXPERIMENT) exp.init() START_EPOCH = exp.epoch END_EPOCH = START_EPOCH + N_EPOCHS for epoch in range(1, END_EPOCH): since = time.time() # # ### Collect data ### # # delete existing folder and old data if os.path.exists(res_root_path): shutil.rmtree(res_root_path) utils.collect_data(ori_train_base_rp, res_train_base_rp) utils.collect_data(ori_val_base_rp, res_val_base_rp) # data loader train_loader, val_loader = utils.data_loader(res_root_path) # # ### Train ### trn_loss = utils.train(model, train_loader, optimizer, criterion, epoch) trn_err = 0 print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format( epoch, trn_loss, trn_err)) time_elapsed = time.time() - since print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) ### Test ### val_loss = utils.test(model, val_loader, criterion, epoch) val_err = 0 print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err)) time_elapsed = time.time() - since print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60)) ### Save Metrics ### exp.save_history('train', trn_loss, trn_err) exp.save_history('val', val_loss, val_err) ### Checkpoint ### exp.save_weights(model, trn_loss, val_loss, trn_err, val_err) exp.save_optimizer(optimizer, val_loss) ## Early Stopping ## if (epoch - exp.best_val_loss_epoch) > MAX_PATIENCE: print(("Early stopping at epoch %d since no " + "better loss found since epoch %.3").format( epoch, exp.best_val_loss)) break # Adjust Lr ### if epoch % 4 == 0: utils.adjust_learning_rate(LEARNING_RATE, LR_DECAY, optimizer, epoch, DECAY_LR_EVERY_N_EPOCHS) exp.epoch += 1