args = utils.config() print(args) experiment_name = utils.get_experiment_name() utils.make_directory("../logs") logging.basicConfig(filename=os.path.join( '../logs/{}.log'.format(experiment_name)), level=logging.INFO, format='%(asctime)s %(name)s %(levelname)s %(message)s') logger = logging.getLogger(__name__) utils.print_log('START with Configuration : {}'.format(args)) data_path = utils.make_directory(args.data_path) partitions, partitions_train, partitions_tune = utils.get_partitions() if args.dataset == "Cars3D": train_data = Cars3D(root=args.data_path, mode="train", image_size=args.image_size) query_data = Cars3D(root=args.data_path, mode="query", image_size=args.image_size) gallery_data = Cars3D(root=args.data_path, mode="gallery", image_size=args.image_size) else: raise ValueError("Provided dataset does not exist") if args.sampling_method == "triplet":
from z_algo import zbox_search from ed import levenshtein_distance_lt_1 from utils import get_partitions if __name__ == '__main__': text = "abdyabxdcyabcdz" pattern = "abcd" pattern_length = len(pattern) first_half, second_half = get_partitions(pattern, 2) result = [] i, j = first_half ps = pattern[i:j] ps2 = pattern[j:pattern_length] hits = zbox_search(text, ps) print("first half hits:", hits) for h in hits: print("hit:", h) # check for substitution print("match substitute") print(ps2, text[h + j:h + pattern_length]) res, d = levenshtein_distance_lt_1(ps2, text[h + j:h + pattern_length]) if res: result.append((h, d)) # check for deletion print(ps2, text[h + j:h + pattern_length + 1]) res, d = levenshtein_distance_lt_1(ps2, text[h + j:h + pattern_length + 1]) if res: result.append((h, 1)) # check for insertion
np.random.seed(1) filename = "%s%s.csv" % (data_dir, dataset) print("Loading features...") data, labels = data_load(filename) posind = np.where(labels == 1)[0] # all known miRNAs print("Done.") startTime = time.time() trainTime = [] testTime = [] for fold in range(nfolds): print("Training deeSOM with %s (fold %d of %d)" % (dataset, fold + 1, nfolds)) train_ind, test_ind = get_partitions(partitions_dir, dataset, len(labels), fold) startFoldTime = time.time() # Train labels[posind] = 1 # Starting with all known genome-wide positives... print("original pos %d" % sum(labels)) # ...remove the positive labels that will be used for testing labels[test_ind] = 0 print("train pos %d" % sum(labels)) toc = time.time() # Train deesom = DeeSOM(verbosity=1) deesom.fit(data, labels)
from z_algo import zbox_search from utils import get_partitions if __name__ == '__main__': text = "bbcaefadcabcpqr" pattern = "abc" pattern_length = len(pattern) partitions = get_partitions(pattern, 2) result = [] for i, j in partitions: ps = pattern[i:j] hits = zbox_search(text, ps) for h in hits: mismatch = 0 # verify string segment before partition if i > 0: for k in range(i): if text[h - i + k] != pattern[k]: mismatch += 1 # verify string segment after partition if j < pattern_length: for k in range(j, pattern_length): if pattern[k] != text[h + k]: mismatch += 1 if mismatch < 2: result.append((h - i, mismatch)) result = list(set(result)) result.sort(key=lambda x: x[0]) for i, mis in result: print(i + 1, mis)