args = utils.config()
print(args)

experiment_name = utils.get_experiment_name()

utils.make_directory("../logs")
logging.basicConfig(filename=os.path.join(
    '../logs/{}.log'.format(experiment_name)),
                    level=logging.INFO,
                    format='%(asctime)s %(name)s %(levelname)s %(message)s')
logger = logging.getLogger(__name__)
utils.print_log('START with Configuration : {}'.format(args))

data_path = utils.make_directory(args.data_path)
partitions, partitions_train, partitions_tune = utils.get_partitions()

if args.dataset == "Cars3D":
    train_data = Cars3D(root=args.data_path,
                        mode="train",
                        image_size=args.image_size)
    query_data = Cars3D(root=args.data_path,
                        mode="query",
                        image_size=args.image_size)
    gallery_data = Cars3D(root=args.data_path,
                          mode="gallery",
                          image_size=args.image_size)
else:
    raise ValueError("Provided dataset does not exist")

if args.sampling_method == "triplet":
Beispiel #2
0
from z_algo import zbox_search
from ed import levenshtein_distance_lt_1
from utils import get_partitions

if __name__ == '__main__':
    text = "abdyabxdcyabcdz"
    pattern = "abcd"
    pattern_length = len(pattern)
    first_half, second_half = get_partitions(pattern, 2)
    result = []
    i, j = first_half
    ps = pattern[i:j]

    ps2 = pattern[j:pattern_length]
    hits = zbox_search(text, ps)
    print("first half hits:", hits)
    for h in hits:
        print("hit:", h)
        # check for substitution
        print("match substitute")
        print(ps2, text[h + j:h + pattern_length])
        res, d = levenshtein_distance_lt_1(ps2, text[h + j:h + pattern_length])
        if res:
            result.append((h, d))
        # check for deletion
        print(ps2, text[h + j:h + pattern_length + 1])
        res, d = levenshtein_distance_lt_1(ps2,
                                           text[h + j:h + pattern_length + 1])
        if res:
            result.append((h, 1))
        # check for insertion
Beispiel #3
0
np.random.seed(1)

filename = "%s%s.csv" % (data_dir, dataset)
print("Loading features...")
data, labels = data_load(filename)
posind = np.where(labels == 1)[0]  # all known miRNAs
print("Done.")
startTime = time.time()

trainTime = []
testTime = []

for fold in range(nfolds):
    print("Training deeSOM with %s (fold %d of %d)" %
          (dataset, fold + 1, nfolds))
    train_ind, test_ind = get_partitions(partitions_dir, dataset, len(labels),
                                         fold)

    startFoldTime = time.time()

    # Train
    labels[posind] = 1  # Starting with all known genome-wide positives...
    print("original pos %d" % sum(labels))
    # ...remove the positive labels that will be used for testing
    labels[test_ind] = 0
    print("train pos %d" % sum(labels))

    toc = time.time()
    # Train
    deesom = DeeSOM(verbosity=1)
    deesom.fit(data, labels)
Beispiel #4
0
from z_algo import zbox_search
from utils import get_partitions

if __name__ == '__main__':
    text = "bbcaefadcabcpqr"
    pattern = "abc"
    pattern_length = len(pattern)
    partitions = get_partitions(pattern, 2)
    result = []
    for i, j in partitions:
        ps = pattern[i:j]
        hits = zbox_search(text, ps)
        for h in hits:
            mismatch = 0
            # verify string segment before partition
            if i > 0:
                for k in range(i):
                    if text[h - i + k] != pattern[k]:
                        mismatch += 1
            # verify string segment after partition
            if j < pattern_length:
                for k in range(j, pattern_length):
                    if pattern[k] != text[h + k]:
                        mismatch += 1
            if mismatch < 2:
                result.append((h - i, mismatch))

    result = list(set(result))
    result.sort(key=lambda x: x[0])
    for i, mis in result:
        print(i + 1, mis)