def multigrid_step(self, x, bc, f, step): ''' One layer of multigrid. Recursive function. Find solution x to Ax + b = 0. ''' batch_size, image_size, _ = x.size() # Pre smoothing for i in range(self.pre_smoothing): x = utils.fd_step(x, bc, f) if step > 1: # Downsample if f is not None: f_sub = 4 * utils.subsample(f) else: f_sub = None if self.is_bc_mask: # Subsample geometry bc_sub = utils.subsample( bc.view(batch_size * 2, image_size, image_size)) bc_sub = bc_sub.view(batch_size, 2, *bc_sub.size()[-2:]) else: bc_sub = bc x_sub = utils.restriction(x, bc_sub) # Refine x_sub recursively x_sub = self.multigrid_step(x_sub, bc_sub, f_sub, step - 1) # Upsample x = utils.interpolation(x_sub, bc) # Post smoothing for i in range(self.post_smoothing): x = utils.fd_step(x, bc, f) return x
def initialize(self, data, n_samples=30000): """ Initializes the EntropyBoW object by selecting the initial centers for the dictionary and the entropy centers """ data = subsample(data, self.n_feature_samples) self.initialize_dictionary(data, n_samples=n_samples) self.entropy.init_centers(self.encode_objects_theano(data))
def __init__(self, corpus, pl_dimension, hl_dimension, window_size, learning_rate, training_method='cbow'): """ Initializing Word2vec class. :param corpus: corpus to train :param pl_dimension: projection layer dimension :param hl_dimension: hidden layer dimension :param window_size: window size :param learning_rate: learning :param training_method: training method (skip-gram or cbow) default: cbow """ self.corpus = corpus self.pl_dimension = pl_dimension self.hl_dimension = hl_dimension self.window_size = window_size self.learning_rate = learning_rate self.loss = 0.0 splitted_corpus = utils.split(corpus) subsampled_splitted_corpus = utils.subsample(splitted_corpus) self.corpus_window_array = utils.corpus2window( subsampled_splitted_corpus, 2) np.random.seed(100) self.weight1 = np.random.rand(len(self.corpus_window_array), pl_dimension) self.weight2 = np.random.rand(pl_dimension, hl_dimension) self.weight3 = np.random.rand(hl_dimension, len(splitted_corpus)) self.training_method = training_method
def fit(self, data, iters=100): """ Train the Soft BoW layer using the entropy objective """ for iter in tqdm(range(iters)): subsampled_data = subsample(data, self.n_feature_samples) cur_loss = self.train_theano(subsampled_data)[0] print "Loss at iteration ", iter, " = ", cur_loss
def main(): net = SkipGramNetwork(hp.VOCAB_SIZE, hp.EMBED_SIZE).to(device) print(net) if args.restore: net.load_state_dict(torch.load(args.restore)) vocab, inverse_vocab = utils.load_data(args.restore) print("Model restored from disk.") else: sentences = utils.load_corpus(args.corpus) word_freqs = utils.word_counts(sentences) sentences, word_freqs = utils.trunc_vocab(sentences, word_freqs) # TODO sentences = utils.subsample(sentences, word_freqs) vocab, inverse_vocab = utils.construct_vocab(sentences) # TODO skipgrams = skip_grams(sentences, vocab) # TODO utils.save_data(args.save, vocab, inverse_vocab) loader = DataLoader(skipgrams, batch_size=hp.BATCH_SIZE, shuffle=True) loss_hist = train( net, loader) # TODO returns loss function evaluations as python list """ You can plot loss_hist for your writeup: plt.plot(loss_hist) plt.show() """ plt.plot(loss_hist) plt.show() # the weights of the embedding matrix are the lookup table lookup_table = net.embeddings.weight.data.cpu().numpy() """ TODO: Implement what you need in order to answer the writeup questions. """ nearest = most_similar(lookup_table, lookup_table[vocab['journeyed']]) nearest_words = [inverse_vocab[w] for w in nearest if w in inverse_vocab] print('Nearest to {0}: {1}'.format('journeyed', nearest_words)) # print('Dimension Reduction and Plotting') # reduced = TSNE().fit_transform(lookup_table) # plt.scatter(reduced[:,0], reduced[:,1]) # plt.show() tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000, method='exact') plot_only = 500 low_dim_embs = tsne.fit_transform(lookup_table[:plot_only, :]) labels = [inverse_vocab[i] for i in range(plot_only)] plot_with_labels(low_dim_embs, labels, 'tsne.png')
def H(self, r, bc): ''' Return H(r). ''' # Get masks first if self.is_bc_mask: bc_mask = bc[:, 1:, :, :] masks = [1 - bc_mask[:, :, 1:-1, 1:-1]] for i in range(self.n_layers - 1): bc_mask = utils.subsample(bc_mask.squeeze(1)).unsqueeze(1) masks.append(1 - bc_mask[:, :, 1:-1, 1:-1]) # Multiply by mask r = r * masks[0] intermediates = [] # used for skip connections # First half for i in range(self.n_layers): for j in range(self.pre_smoothing): idx = i * self.pre_smoothing + j r = self.first_layers[idx](r) if self.is_bc_mask: r = r * masks[i] # Add to intermediates intermediates.append(r) # Subsample if i < self.n_layers - 1: r = self.pooling_layers[i](r) if self.is_bc_mask: r = r * masks[i + 1] # Second half for i in range(self.n_layers): for j in range(self.post_smoothing): idx = i * self.post_smoothing + j r = self.second_layers[idx](r) if self.is_bc_mask: r = r * masks[self.n_layers - i - 1] # Add skip connections r = r + intermediates[self.n_layers - i - 1] # Upsample if i < self.n_layers - 1: r = F.pad(r, (1, 1, 1, 1)) new_size = r.size(-1) * 2 - 1 r = F.interpolate(r, size=new_size, mode='bilinear', align_corners=True) r = r[:, :, 1:-1, 1:-1] if self.is_bc_mask: r = r * masks[self.n_layers - i - 2] return r
def iter_pcds(file_names, subsample_size, max_scenes): """ return an iterable of all pcd files, from which we take a sample. """ for file_id, file_name in enumerate(file_names): # max number of scenes reached if file_id == max_scenes: break all_points = utils.readpcd(file_name) sample = utils.subsample(all_points, subsample_size) if \ subsample_size < 1 else all_points yield file_id, sample, all_points
def test_subsampling(geometry, image_size): print('########### Test subsampling ##########\n') x, bc_values, bc_mask = utils.get_geometry(geometry, image_size, 1, 1) bc_values = torch.Tensor(bc_values) bc_mask = torch.Tensor(bc_mask) plot(bc_values.squeeze(0).numpy()) plot(bc_mask.squeeze(0).numpy()) n_layers = 3 for i in range(n_layers): bc_values = utils.subsample(bc_values) bc_mask = utils.subsample(bc_mask) mask = bc_mask.squeeze(0).numpy() assert np.all(np.logical_or(np.isclose(mask, 0), np.isclose(mask, 1))) bc_values = bc_values * bc_mask values = bc_values.squeeze(0).numpy() assert np.all(values < 1.00001) plot(values) plot(mask)
def test_upsampling_poisson(x, gt, bc, f): print('Upsampling multigrid') f_sub = utils.subsample(f) x_sub = utils.restriction(x, bc) for i in range(1000): x_sub = utils.fd_step(x_sub, bc, f_sub) # Upsample x = utils.interpolation(x_sub, bc) A = utils.loss_kernel.view(1, 1, 3, 3) r = F.conv2d(x.unsqueeze(1), A).squeeze(1) r = utils.pad_boundary(r, torch.zeros(1, 4)) - f r = r.cpu().numpy() print(r.max())
def test_subsampling_poisson(x, gt, bc, f): print('Subsampling multigrid') for i in range(2000): x = utils.fd_step(x, bc, f) A = utils.loss_kernel.view(1, 1, 3, 3) r = F.conv2d(x.unsqueeze(1), A).squeeze(1) r = utils.pad_boundary(r, torch.zeros(1, 4)) - f print(np.abs(r.cpu().numpy()).max()) # Subsample x_sub = x f_sub = f for i in range(3): f_sub = 4 * utils.subsample(f) x_sub = utils.restriction(x, bc) r_sub = F.conv2d(x_sub.unsqueeze(1), A).squeeze(1) r_sub = utils.pad_boundary(r_sub, torch.zeros(1, 4)) - f_sub print(x_sub.size()) print(np.abs(r_sub.cpu().numpy()).max())
def set_field_of_view(self, subsampling=[1,1,1], corner=[0,0,0], size=None, fixed_npoints=None): self.block_corner = np.array(corner, dtype='uint') if size == None: size = self.source.shape self.block_size = np.array(size, dtype='uint') aux = self.source_clamped[corner[0]:corner[0]+size[0]-1:subsampling[0], corner[1]:corner[1]+size[1]-1:subsampling[1], corner[2]:corner[2]+size[2]-1:subsampling[2]] if fixed_npoints==None: self.block_subsampling = np.array(subsampling, dtype='uint') self.source_block = aux self.block_npoints = (self.source_block >= 0).sum() else: fixed_npoints = int(fixed_npoints) self.source_block, self.block_subsampling, self.block_npoints = \ subsample(aux, npoints=fixed_npoints) ## Taux: block to full array transformation Taux = np.diag(np.concatenate((self.block_subsampling,[1]),1)) Taux[0:3,3] = self.block_corner self.block_transform = np.dot(self.source_toworld, Taux)
optimizer = optim.Adam(model.parameters(), lr=0.01) else: model = MLP(x.size()[-1], out_dim = nb_classes).to(device) optimizer = optim.Adam(model.parameters(), lr=0.001) loss_func = nn.CrossEntropyLoss() train_mlp(model, optimizer, loss_func, x, labels, train_mask_real, val_mask, test_mask, batch_size=128, epochs = 200, iters_per_epoch=int(nb_train_real/128)+1, patience=10) elif args.model == 'ATTR_RW_MF': ## ``Ours 1`` with (args.times_features == False) ## ``Ours 2`` with (args.times_features == True) if args.saved == False: # For the abalation study of local, non-local if args.subsample: features = subsample(adj, features) trans_attr_rw, trans_deg_inv, vol = normalize_trans(concat_attr(adj, features)) rank_k = 300 window_size = 5 ''' vals, vecs = LA.eigsh(trans_attr_rw, k=rank_k) vals_power = [vals] for i in range(window_size): vals_power.append(vals_power[-1] * vals) vals_power = sum(vals_power) / window_size trans_power = vecs @ np.diag(vals_power) @ vecs.transpose() '''
reload(utils) reload(algo_param) reload(param) # TODO Add unlabeled subset functionality # TODO Add parallelization ##################### PERFORM GRID SEARCH ######################## if param.optimize_params: # parse data all_X, all_Y = utils.parse(param.data_file, param.feature_file, param.response_var, debug_limit=param.debug_limit) X, Y = utils.labeled_subset(all_X, all_Y) X, Y = utils.subsample((X, Y), param.labeled_subsample) (X_train, X_test, Y_train, Y_test) = utils.train_test_split(X, Y, test_size=param.test_size) # pickle data for use in other files saved_data = (X_train, X_test, Y_train, Y_test) utils.pickler(saved_data, param.optimization_data_pickle) # make meta pipeline for grid searching pipeline, parameter_space = make_meta_pipeline([ ('imputer', param.imputer_params), ('scaler', param.scaler_params), ('dim_reducer', param.dim_reducer_params), ('regressor', param.regressor_params) ], all_X, all_Y) print("Opening logfiles")
def train(config): print("Deep copy of model with margin as 1.0") ## set pre-process prep_dict = {} prep_config = config["prep"] prep_dict["source"] = prep.image_train(**config["prep"]['params']) prep_dict["target"] = prep.image_train(**config["prep"]['params']) if prep_config["test_10crop"]: prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params']) else: prep_dict["test"] = prep.image_test(**config["prep"]['params']) ## prepare data dsets = {} dset_loaders = {} data_config = config["data"] train_bs = data_config["source"]["batch_size"] test_bs = data_config["test"]["batch_size"] dsets["source"] = ImageList(open(data_config["source"]["list_path"]).readlines(), \ transform=prep_dict["source"]) dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, \ shuffle=True, num_workers=4, drop_last=True) dsets["target"] = ImageList(open(data_config["target"]["list_path"]).readlines(), \ transform=prep_dict["target"]) dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, \ shuffle=True, num_workers=4, drop_last=True) if prep_config["test_10crop"]: for i in range(10): dsets["test"] = [ImageList(open(data_config["test"]["list_path"]).readlines(), \ transform=prep_dict["test"][i]) for i in range(10)] dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, \ shuffle=False, num_workers=4) for dset in dsets['test']] else: dsets["test"] = ImageList(open(data_config["test"]["list_path"]).readlines(), \ transform=prep_dict["test"]) dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, \ shuffle=False, num_workers=4) class_num = config["network"]["params"]["class_num"] ## set base network net_config = config["network"] base_network = net_config["call"](net_config["name"], **net_config["params"]) base_network_teacher = net_config["call"](net_config["name"], **net_config["params_teacher"]) base_network = base_network.cuda() base_network_teacher = copy.deepcopy(base_network).cuda() for param in base_network_teacher.parameters(): param.detach_() # base_network_teacher = base_network_teacher.cuda() # print("check init: ", torch.equal(base_network.fc.weight, base_network_teacher.fc.weight)) base_network.layer1[-1].relu = nn.ReLU() base_network.layer2[-1].relu = nn.ReLU() base_network.layer3[-1].relu = nn.ReLU() base_network.layer4[-1].relu = nn.ReLU() base_network_teacher.layer1[-1].relu = nn.ReLU() base_network_teacher.layer2[-1].relu = nn.ReLU() base_network_teacher.layer3[-1].relu = nn.ReLU() base_network_teacher.layer4[-1].relu = nn.ReLU() # print(base_network) for n, m in base_network.named_modules(): if n == 'layer1.2.bn3' or 'layer2.3.bn3' or 'layer3.5.bn3' or 'layer4.2.bn3': m.register_forward_hook(get_activation_student(n)) if config["loss"]["random"]: random_layer = network.RandomLayer( [base_network.output_num(), class_num], config["loss"]["random_dim"]) ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024) else: random_layer = None ad_net = network.AdversarialNetwork( base_network.output_num() * class_num, 1024) if config["loss"]["random"]: random_layer.cuda() ad_net = ad_net.cuda() parameter_list = base_network.get_parameters() + ad_net.get_parameters() Hloss = loss.Entropy() ## set optimizer optimizer_config = config["optimizer"] optimizer = optimizer_config["type"](parameter_list, \ **(optimizer_config["optim_params"])) param_lr = [] for param_group in optimizer.param_groups: param_lr.append(param_group["lr"]) schedule_param = optimizer_config["lr_param"] lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]] gpus = config['gpu'].split(',') if len(gpus) > 1: ad_net = nn.DataParallel(ad_net, device_ids=[int(i) for i in gpus]) base_network = nn.DataParallel(base_network, device_ids=[int(i) for i in gpus]) ## train len_train_source = len(dset_loaders["source"]) len_train_target = len(dset_loaders["target"]) transfer_loss_value = classifier_loss_value = total_loss_value = 0.0 best_acc = 0.0 temperature = config["temperature"] for i in trange(config["num_iterations"], leave=False): global activation_student if i % config["test_interval"] == config["test_interval"] - 1: base_network.eval() base_network_teacher.eval() temp_acc, temp_acc_teacher = image_classification_test(dset_loaders, \ base_network, base_network_teacher, test_10crop=prep_config["test_10crop"]) temp_model = nn.Sequential(base_network_teacher) if temp_acc > best_acc: best_acc = temp_acc best_model = temp_model log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc) log_str1 = "precision: {:.5f}".format(temp_acc_teacher) config["out_file"].write(log_str + "\t" + log_str1 + "\t" + str(classifier_loss.item()) + "\t" + str(dann_loss.item()) + "\t" + str(ent_loss.item()) + "\t" + "\n") config["out_file"].flush() print("ent Loss: ", ent_loss.item()) print("Dann loss: ", dann_loss.item()) print("Classification Loss: ", classifier_loss.item()) print(log_str) print(log_str1) # if i % config["snapshot_interval"] == 0: # torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \ # "iter_{:05d}_model.pth.tar".format(i))) loss_params = config["loss"] ## train one iter base_network.train(True) base_network_teacher.train(True) ad_net.train(True) optimizer = lr_scheduler(optimizer, i, **schedule_param) optimizer.zero_grad() if i % len_train_source == 0: iter_source = iter(dset_loaders["source"]) if i % len_train_target == 0: iter_target = iter(dset_loaders["target"]) inputs_source, labels_source = iter_source.next() inputs_target, labels_target = iter_target.next() inputs_source1, inputs_source2, inputs_target1, inputs_target2, labels_source = utils.get_copies( inputs_source, inputs_target, labels_source) margin = 1 loss_alter = 0 #### For source data features_source, outputs_source = base_network(inputs_source1) # features_source2, outputs_source2 = base_network(inputs_source2) feature1 = base_network_teacher.features1(inputs_source2) feature2 = base_network_teacher.features2(feature1) feature3 = base_network_teacher.features3(feature2) feature4 = base_network_teacher.features4(feature3) feature4_avg = base_network_teacher.avgpool(feature4) feature4_res = feature4_avg.view(feature4_avg.size(0), -1) features_source2 = base_network_teacher.bottleneck(feature4_res) outputs_source2 = base_network_teacher.fc(features_source2) loss_alter += loss.decision_boundary_transfer( activation_student['layer1.2.bn3'], feature1.detach(), margin) / ( train_bs * activation_student['layer1.2.bn3'].size(1) * 8) loss_alter += loss.decision_boundary_transfer( activation_student['layer2.3.bn3'], feature2.detach(), margin) / ( train_bs * activation_student['layer2.3.bn3'].size(1) * 4) loss_alter += loss.decision_boundary_transfer( activation_student['layer3.5.bn3'], feature3.detach(), margin) / ( train_bs * activation_student['layer3.5.bn3'].size(1) * 2) loss_alter += loss.decision_boundary_transfer( activation_student['layer4.2.bn3'], feature4.detach(), margin) / (train_bs * activation_student['layer4.2.bn3'].size(1)) ## For Target data ramp = utils.sigmoid_rampup(i, 100004) ramp_confidence = utils.sigmoid_rampup(5 * i, 100004) features_target, outputs_target = base_network(inputs_target1) sample_selection_indices = get_confident_idx.confident_samples( base_network, inputs_target1, ramp_confidence, class_num, train_bs) confident_targets = utils.subsample(outputs_target, sample_selection_indices) feature1_teacher = base_network_teacher.features1(inputs_target2) feature2_teacher = base_network_teacher.features2(feature1_teacher) feature3_teacher = base_network_teacher.features3(feature2_teacher) feature4_teacher = base_network_teacher.features4(feature3_teacher) feature4_teacher_avg = base_network_teacher.avgpool(feature4_teacher) feature4_teacher_res = feature4_teacher_avg.view( feature4_teacher_avg.size(0), -1) features_target2 = base_network_teacher.bottleneck( feature4_teacher_res) outputs_target2 = base_network_teacher.fc(features_target2) loss_alter += loss.decision_boundary_transfer( activation_student['layer1.2.bn3'], feature1_teacher.detach(), margin) / (train_bs * activation_student['layer1.2.bn3'].size(1) * 8) loss_alter += loss.decision_boundary_transfer( activation_student['layer2.3.bn3'], feature2_teacher.detach(), margin) / (train_bs * activation_student['layer2.3.bn3'].size(1) * 4) loss_alter += loss.decision_boundary_transfer( activation_student['layer3.5.bn3'], feature3_teacher.detach(), margin) / (train_bs * activation_student['layer3.5.bn3'].size(1) * 2) loss_alter += loss.decision_boundary_transfer( activation_student['layer4.2.bn3'], feature4_teacher.detach(), margin) / (train_bs * activation_student['layer4.2.bn3'].size(1)) loss_alter = loss_alter / 1000 ## May be multiply with 4 later in tests loss_alter = loss_alter.unsqueeze(0).unsqueeze(1) features = torch.cat((features_source, features_target), dim=0) outputs = torch.cat((outputs_source, outputs_target), dim=0) softmax_out_src = nn.Softmax(dim=1)(outputs_source) softmax_out_tar = nn.Softmax(dim=1)(outputs_target) softmax_out = nn.Softmax(dim=1)(outputs) features_teacher = torch.cat((features_source2, features_target2), dim=0) outputs_teacher = torch.cat((outputs_source2, outputs_target2), dim=0) softmax_out_src_teacher = nn.Softmax(dim=1)(outputs_source2) softmax_out_tar_teacher = nn.Softmax(dim=1)(outputs_target2) softmax_out_teacher = nn.Softmax(dim=1)(outputs_teacher) if config['method'] == 'DANN+E': ent_loss = Hloss(confident_targets) dann_loss = loss.DANN(features, ad_net) elif config['method'] == 'DANN': dann_loss = loss.DANN(features, ad_net) # dann_loss = 0 else: raise ValueError('Method cannot be recognized.') classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) # loss_KD = -(F.softmax(outputs_teacher/ temperature, 1).detach() * # (F.log_softmax(outputs/temperature, 1) - F.log_softmax(outputs_teacher/temperature, 1).detach())).sum() / train_bs # print(loss_KD) # total_loss = loss_alter #+ (config["ent_loss"] * ent_loss) total_loss = dann_loss + classifier_loss + ( ramp * ent_loss) #+ (config["ent_loss"] * ent_loss) total_loss.backward(retain_graph=True) optimizer.step() loss.update_ema_variables(base_network, base_network_teacher, config["teacher_alpha"], i) torch.save(best_model, osp.join(config["output_path"], "best_model.pth.tar")) return best_acc
def load_split(self, features, split, feature_type="X", sample_rate=0.5): # Setup directory and filenames dir_features = self.feature_path(features) # Get splits for this partion of data if self.activity == None: file_train = open( self.base_dir + "Splits/{}/{}/train.txt".format(self.name, split)).readlines() file_test = open( self.base_dir + "Splits/{}/{}/test.txt".format(self.name, split)).readlines() else: file_train = open( self.base_dir + "Splits/{}/{}/{}/train.txt".format( self.name, self.activity, split)).readlines() file_test = open(self.base_dir + "Splits/{}/{}/{}/test.txt".format( self.name, self.activity, split)).readlines() file_train = [f.strip() for f in file_train] file_test = [f.strip() for f in file_test] # Remove extension if "." in file_train[0]: file_train = [".".join(f.split(".")[:-1]) for f in file_train] file_test = [".".join(f.split(".")[:-1]) for f in file_test] self.trials_train = file_train self.trials_test = file_test # Get all features files_features = self.get_files(dir_features, split) X_all, Y_all = [], [] dir_labels = '../UW_IOM_Dataset/VideoLabelsNum/' for f in files_features: if "Split_" in os.listdir(dir_features)[-1]: # data_tmp = sio.loadmat( closest_file("{}{}/{}".format(dir_features,split, f)) ) # print(closest_file("{}{}/{}".format(dir_labels,split, f))) data_tmp_Y = np.load( closest_file("{}{}/{}".format(dir_labels, split, f))) data_tmp_X = np.load( closest_file("{}{}/{}".format( dir_features, split, f)))[0:np.shape(data_tmp_Y)[0], :] #print('here') #print('data_tmp_X: ',np.shape(data_tmp_X)) #print('data_tmp_Y: ',np.shape(data_tmp_Y)) else: # data_tmp = sio.loadmat( closest_file("{}/{}".format(dir_features, f)) ) data_tmp_Y = np.load( closest_file("{}/{}".format(dir_labels, f))) data_tmp_X = np.load( closest_file("{}/{}".format( dir_features, f)))[0:np.shape(data_tmp_Y)[0], :] X_all += [data_tmp_X.astype(np.float32)] Y_all += [data_tmp_Y[:, 0]] # print('data_tmp_Y: ',np.shape(data_tmp_Y)) print(f) print('X_all ', np.shape(X_all)) print('Y_all ', np.shape(Y_all)) # Make sure axes are correct (TxF not FxT for F=feat, T=time) print("Make sure axes are correct (TxF not FxT for F=feat, T=time)") print(X_all[0].shape, Y_all[0].shape) if X_all[0].shape[0] != Y_all[0].shape[0]: X_all = [x.T for x in X_all] self.n_features = X_all[0].shape[1] # print('n_features: ',self.n_features) self.n_classes = len(np.unique(np.hstack(Y_all))) # print('X_all',np.shape(X_all[0])) # Make sure labels are sequential # print('n_classes: ',self.n_classes) # print(np.shape(np.hstack(Y_all))) if self.n_classes != np.hstack(Y_all).max() + 1: Y_all = utils.remap_labels(Y_all) print("Reordered class labels") # Subsample the data if sample_rate > 1: # print('sample_rate',sample_rate) X_all, Y_all = utils.subsample(X_all, Y_all, sample_rate, dim=0) # print('X_all',np.shape(X_all[0])) #print('Y_all',np.shape(Y_all[0])) # ------------Train/test Splits--------------------------- # Split data/labels into train/test splits fid2idx = self.fid2idx(files_features) #print(file_train) #print(fid2idx) X_train = [X_all[fid2idx[f]] for f in file_train if f in fid2idx] X_test = [X_all[fid2idx[f]] for f in file_test if f in fid2idx] #print(len(X_train)) y_train = [Y_all[fid2idx[f]] for f in file_train if f in fid2idx] y_test = [Y_all[fid2idx[f]] for f in file_test if f in fid2idx] #print('Xtrain', np.shape(X_train)) #print('Ytrain', np.shape(y_train)) #print('Xtest', np.shape(X_test)) #print('Ytest', np.shape(y_train)) if len(X_train) == 0: print("Error loading data") return X_train, y_train, X_test, y_test
import subprocess from re import sub import os import numpy as np import utils mergedsortfn = sys.argv[1] origroibamfn = sys.argv[2] sampledbamfn = sub('.sorted.bam$', ".sampled.sorted.bam", mergedsortfn) GAIN_FINAL = sys.argv[3] ratio = float(utils.countReads(mergedsortfn)) / float( utils.countReads(origroibamfn)) samplerate = round(0.5 / (ratio * 0.98), 2) success = False if (samplerate < 1.0): utils.subsample(mergedsortfn, sampledbamfn, str(samplerate)) success = True elif (samplerate < 1.5): print('sample rate is larger than! ' + str(samplerate)) os.rename(mergedsortfn, sampledbamfn) success = True else: print("not enough number of reads found for " + mergedsortfn) if (success): os.rename(sampledbamfn, GAIN_FINAL)
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, outputs_collections=None, scope=None): """Bottleneck residual unit variant with BN before convolutions. This is the full preactivation residual unit variant proposed in [2]. See Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck variant which has an extra bottleneck layer. When putting together two consecutive ResNet blocks that use this unit, one should use stride = 2 in the last unit of the first block. Args: inputs: A tensor of size [batch, height, width, channels]. depth: The depth of the ResNet unit output. depth_bottleneck: The depth of the bottleneck layers. stride: The ResNet unit's stride. Determines the amount of downsampling of the units output compared to its input. rate: An integer, rate for atrous convolution. outputs_collections: Collection to add the ResNet unit output. scope: Optional variable_scope. Returns: The ResNet unit's output. """ with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') if depth == depth_in: shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') else: shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut') residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, scope='conv1') residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2') residual = slim.conv2d(residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3') output = shortcut + residual return slim.utils.collect_named_outputs(outputs_collections, sc.name, output)
def load_split(self, features, split, feature_type="X", sample_rate=1): # Setup directory and filenames dir_features = self.feature_path(features) # Get splits for this partion of data if self.activity == None: file_train = open( self.base_dir + "splits/{}/{}/train.txt".format(self.name, split)).readlines() file_test = open( self.base_dir + "splits/{}/{}/test.txt".format(self.name, split)).readlines() else: file_train = open( self.base_dir + "splits/{}/{}/{}/train.txt".format( self.name, self.activity, split)).readlines() file_test = open(self.base_dir + "splits/{}/{}/{}/test.txt".format( self.name, self.activity, split)).readlines() file_train = [f.strip() for f in file_train] file_test = [f.strip() for f in file_test] # Remove extension if "." in file_train[0]: file_train = [".".join(f.split(".")[:-1]) for f in file_train] file_test = [".".join(f.split(".")[:-1]) for f in file_test] self.trials_train = file_train self.trials_test = file_test # Get all features files_features = self.get_files(dir_features, split) X_all, Y_all = [], [] for f in files_features: if "Split_" in os.listdir(dir_features)[-1]: data_tmp = sio.loadmat( closest_file("{}{}/{}".format(dir_features, split, f))) else: data_tmp = sio.loadmat( closest_file("{}/{}".format(dir_features, f))) X_all += [data_tmp[feature_type].astype(np.float32)] Y_all += [np.squeeze(data_tmp["Y"])] # Make sure axes are correct (TxF not FxT for F=feat, T=time) if X_all[0].shape[0] != Y_all[0].shape[0]: X_all = [x.T for x in X_all] self.n_features = X_all[0].shape[1] self.n_classes = len(np.unique(np.hstack(Y_all))) # Make sure labels are sequential if self.n_classes != np.hstack(Y_all).max() + 1: Y_all = utils.remap_labels(Y_all) print("Reordered class labels") # Subsample the data if sample_rate > 1: X_all, Y_all = utils.subsample(X_all, Y_all, sample_rate, dim=0) # ------------Train/test Splits--------------------------- # Split data/labels into train/test splits fid2idx = self.fid2idx(files_features) X_train = [X_all[fid2idx[f]] for f in file_train if f in fid2idx] X_test = [X_all[fid2idx[f]] for f in file_test if f in fid2idx] y_train = [Y_all[fid2idx[f]] for f in file_train if f in fid2idx] y_test = [Y_all[fid2idx[f]] for f in file_test if f in fid2idx] if len(X_train) == 0: print("Error loading data") return X_train, y_train, X_test, y_test