Beispiel #1
0
def visual_delete_one_by_one(heap_type, title):
    heap = heap_type()

    # generate random numbers and add them to the heap
    nodes = []
    values = random.sample(range(max_key), node_amount)
    for n in values:
        node = heap.insert(n)  # add node to heap
        nodes.append(node)
    visualize(
        heap,
        title=f"{title}: 0. Inserted values",
        highlight=min(nodes, key=lambda n: n.key),
    )

    # remove nodes from heap one be one
    nodes = sorted(nodes, key=lambda n: n.key, reverse=True)
    delete_next = nodes.pop()
    for i in range(1, delete_amount + 1):
        heap.delete(delete_next)
        if i % 3 == 0:
            delete_next = nodes.pop()
        else:
            delete_next = nodes.pop(random.randrange(len(nodes)))

        visualize(
            heap,
            title=f"{title}: {i}. Deleting the node {delete_next.key}",
            highlight=delete_next,
        )
def visualize_voc_unet():
    from data.voc2012_loader_segmentation import PascalVOCSegmentation
    from torch.utils.data.dataloader import DataLoader
    from visualize.visualize import visualize
    from models.unet import UNet

    dataloader = DataLoader(PascalVOCSegmentation('val'),
                            batch_size=16,
                            shuffle=False,
                            num_workers=0)
    model = UNet(outputs=21, name='voc_unet')
    model.load()
    visualize(model, dataloader, model.name + '_visualization/')
Beispiel #3
0
def visualize_voc_wass():
    from data.voc2012_loader_segmentation import PascalVOCSegmentation
    from torch.utils.data.dataloader import DataLoader
    from visualize.visualize import visualize
    from models.wass import WASS

    dataloader = DataLoader(PascalVOCSegmentation('val'),
                            batch_size=32,
                            shuffle=False,
                            num_workers=6)
    model = WASS(name='voc_wass')
    model.load()
    visualize(model, dataloader, model.name + '_visualization/')
Beispiel #4
0
def visualize_voc_vgg16():
    from data.voc2012_loader_segmentation import PascalVOCSegmentation
    from torch.utils.data.dataloader import DataLoader
    from visualize.visualize import visualize
    from models.vgg16 import Vgg16GAP

    dataloader = DataLoader(PascalVOCSegmentation('val'),
                            batch_size=32,
                            shuffle=False,
                            num_workers=6)
    model = Vgg16GAP(outputs=21, name='voc_vgg16')
    model.load()
    visualize(model, dataloader, model.name + '_visualization/')
Beispiel #5
0
def main(args):
    config = parse_configs(args)

    mode = args.mode
    for m in mode:
        if m == MODE_STATS:
            stats(config[MODE_STATS])
        elif m == MODE_PERTURB:
            perturb(config[MODE_PERTURB])
        elif m == MODE_CHECK:
            check(config[MODE_CHECK])
        elif m == MODE_TOLERANCE:
            tolerance(config[MODE_TOLERANCE])
        elif m == MODE_RUN_ENSEMBLE:
            run_ensemble(config[MODE_RUN_ENSEMBLE])
        elif m == MODE_VISUALIZE:
            visualize(config)
        else:
            sys.exit("invalid mode '{}' selected. must be '{}', '{}', '{}', '{}' or '{}'"
                     .format(mode, MODE_PERTURB, MODE_CHECK, MODE_STATS, MODE_TOLERANCE, MODE_RUN_ENSEMBLE,
                             MODE_VISUALIZE))
Beispiel #6
0
def all(args):

    # Go filtering
    #
    # Why I'm NOT using .gz ext here even I have implemented this:
    # 1. flate2 is slow, it takes much compressing data if single-threaded.
    # 2. plug in a SSD is much more easier than adding a CPU.
    # 3. Some method uses only plain text data, so you need an extra (de)compression
    #    but it means nothing in the process.
    # 4. Some further codes may only accept plain-text input, and I'm not adding
    #    support of gzip to it.

    args.cleanq1 = 'clean.1.fq'
    args.cleanq2 = 'clean.2.fq'
    if configurations.filter_rawdata.compress_output_in_all:
        args.cleanq1 += '.gz'
        args.cleanq2 += '.gz'

    if not args.disable_filter:
        args.fastq1, args.fastq2 = filter(args)

    args.fastafile = assemble(args)
    args.fastafile = findmitoscaf(args)

    if not args.disable_annotation:
        (args.pos_json, args.circular,
         args.annotated_cds, args.annotated_rna) = annotate(args)

        # Visualization is of no way if not annotated.
        args.circos_png, args.circos_svg = visualize(
            args) if not args.disable_visualization else (None, None)

    # Add command check if there's something further
    # If you wrapped the 'all' module in other task or workflow
    # the results will be retained since we don't know what you
    # want.
    if args.__calling == 'all':
        def move_to_result(*files):
            for file in files:
                if path.isfile(str(file)):
                    os.rename(file, path.join(
                        args.result_dir, path.basename(file)))
        # Iteratively collects all the results generated in the whole process
        move_to_result(args.circos_png, args.circos_svg,
                       args.pos_json, args.fastafile,
                       args.annotated_cds, args.annotated_rna)
        logger.log(2, f'Results dumped at {args.result_dir}')
sys.path.insert(0, os.path.abspath('../'))

from visualize.visualize import visualize
from torch.utils.data.dataloader import DataLoader
from data.voc2012_loaders import PascalVOCSegmentation
from models.vgg16_gap_feat import Vgg16GAP
from data import voc2012

model = Vgg16GAP('voc_classification_epoch_1', voc2012.get_class_count() - 1)
model.load()
visualize(model=model,
          dataloaders={
              'val':
              DataLoader(PascalVOCSegmentation('val'),
                         batch_size=1,
                         shuffle=False,
                         num_workers=0),
          },
          palette=voc2012.color_map(256))

model = Vgg16GAP('voc_classification_epoch_5', voc2012.get_class_count() - 1)
model.load()
visualize(model=model,
          dataloaders={
              'val':
              DataLoader(PascalVOCSegmentation('val'),
                         batch_size=1,
                         shuffle=False,
                         num_workers=0),
          },
        """
        return self.data

    def getPruned(self):
        """
        Get the list of Data objects after pruning.
        """
        return self.pruned

    def removeRtree(self):
        """
        remove rtree data and index file
        """
        try:
            os.remove(str(self.dim) + 'd_index.data')
            os.remove(str(self.dim) + 'd_index.index')
            print('Files removed')
        except:
            print('No such files')


if __name__ == '__main__':
    test = prunePSky(3, drange=[0, 100], radius=2)
    test.loadData('test_30_dim2_pos3_rad2_0100.csv')
    test.createIndex(2)
    test.pruning()
    test.calculateUSky()
    visualize(test.getOrigin(), 3, [0, 100])
    visualize(test.getPruned(), 3, [0, 100])
    visualize(test.getCandidate(), 3, [0, 100])
    test.removeRtree()
Beispiel #9
0
def main():
    global args
    args = parser.parse_args()
    use_gpu = torch.cuda.is_available()

    # Load and process data
    time_data = time.time()
    SRC, TRG, train_iter, val_iter = preprocess(args.v, args.b)
    print('Loaded data. |TRG| = {}. Time: {:.2f}.'.format(
        len(TRG.vocab),
        time.time() - time_data))

    # Load embeddings if available
    LOAD_EMBEDDINGS = True
    if LOAD_EMBEDDINGS:
        np_de_file = 'scripts/emb-{}-de.npy'.format(len(SRC.vocab))
        np_en_file = 'scripts/emb-{}-en.npy'.format(len(TRG.vocab))
        embedding_src, embedding_trg = load_embeddings(SRC, TRG, np_de_file,
                                                       np_en_file)
        print('Loaded embedding vectors from np files')
    else:
        embedding_src = (torch.rand(len(SRC.vocab), args.emb) - 0.5) * 2
        embedding_trg = (torch.rand(len(TRG.vocab), args.emb) - 0.5) * 2
        print('Initialized embedding vectors')

    # Create model
    tokens = [TRG.vocab.stoi[x] for x in ['<s>', '</s>', '<pad>', '<unk>']]
    model = Seq2seq(embedding_src,
                    embedding_trg,
                    args.hs,
                    args.nlayers,
                    args.dp,
                    args.bi,
                    args.attn,
                    tokens_bos_eos_pad_unk=tokens,
                    reverse_input=args.reverse_input)

    # Load pretrained model
    if args.model is not None and os.path.isfile(args.model):
        model.load_state_dict(torch.load(args.model))
        print('Loaded pretrained model.')
    model = model.cuda() if use_gpu else model

    # Create weight to mask padding tokens for loss function
    weight = torch.ones(len(TRG.vocab))
    weight[TRG.vocab.stoi['<pad>']] = 0
    weight = weight.cuda() if use_gpu else weight

    # Create loss function and optimizer
    criterion = nn.CrossEntropyLoss(weight=weight)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=args.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           'max',
                                                           patience=30,
                                                           factor=0.25,
                                                           verbose=True,
                                                           cooldown=6)
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,13,16,19], gamma=0.5)

    # Create directory for logs, create logger, log hyperparameters
    path = os.path.join('saves',
                        datetime.datetime.now().strftime("%m-%d-%H-%M-%S"))
    os.makedirs(path, exist_ok=True)
    logger = Logger(path)
    logger.log('COMMAND ' + ' '.join(sys.argv), stdout=False)
    logger.log(
        'ARGS: {}\nOPTIMIZER: {}\nLEARNING RATE: {}\nSCHEDULER: {}\nMODEL: {}\n'
        .format(args, optimizer, args.lr, vars(scheduler), model),
        stdout=False)

    # Train, validate, or predict
    start_time = time.time()
    if args.predict_from_input is not None:
        predict.predict_from_input(model, args.predict_from_input, SRC, TRG,
                                   logger)
    elif args.predict is not None:
        predict.predict(model, args.predict, args.predict_outfile, SRC, TRG,
                        logger)
    elif args.visualize:
        visualize.visualize(train_iter, model, SRC, TRG, logger)
    elif args.evaluate:
        valid.validate(val_iter, model, criterion, SRC, TRG, logger)
    else:
        train.train(train_iter, val_iter, model, criterion, optimizer,
                    scheduler, SRC, TRG, args.epochs, logger)
    logger.log('Finished in {}'.format(time.time() - start_time))
    return