def main():
    args = get_argparser()

    # our dataset has two class
    classes = utils.parse_config(args.config_path)
    print(len(classes), args.num_classes, classes)
    assert len(classes) + 1 == args.num_classes, "Number of classes\
    in config and argument is not same"

    # use our dataset and defined transformations

    dataset = loader.CellDataset(args.root_dir,
                                 utils.get_transform(args.model, train=True),
                                 args.labels_type, args.model, classes)
    dataset_test = loader.CellDataset(args.root_dir,
                                      utils.get_transform(args.model,
                                                          train=False),
                                      args.labels_type,
                                      args.model,
                                      classes,
                                      mode="Test")

    indices = torch.arange(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset,
                                      indices[:int(len(indices) * 0.9)])
    dataset_test = torch.utils.data.Subset(dataset_test,
                                           indices[int(len(indices) * 0.9):])
    print("Images in Test set", len(dataset_test), "Images in Train set ",
          len(dataset))
    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=4,
                                              collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(dataset_test,
                                                   batch_size=1,
                                                   shuffle=True,
                                                   num_workers=4,
                                                   collate_fn=utils.collate_fn)

    model = models.get_model(args.model, args.weight_path, args.num_classes,
                             args.max_instances, args.maskrcnn_backbone)

    if args.cuda:
        device = "cuda:0"
        model.to(device)
    else:
        device = "cpu"

    # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
    #                             momentum=0.9, weight_decay=0.0005)
    print("\n\nStarting Training of ", args.model, "\n\n")
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    model_trainer = trainer.TrainModel(model, optimizer, args.model, device)
    for epoch in range(args.epochs):
        model_trainer.train(epoch, data_loader, data_loader_test)

    print("That's it!")
Beispiel #2
0
def argument_parser(sys_argv):
    # ARGUMENT HANDLING
    parser = argparse.ArgumentParser(prog='Test models')

    parser.add_argument('--data-config',
                        help="configuration file path",
                        required=True,
                        type=str)
    parser.add_argument('--model-folder',
                        help="Folder where the model is available",
                        required=True,
                        type=str)
    parser.add_argument('--results-folder',
                        help="where to store probabilities of each class",
                        type=str)
    parser.add_argument('--metrics',
                        nargs='+',
                        help="Metrics solicited",
                        default=None,
                        type=str)
    parser.add_argument('--datasets',
                        nargs='+',
                        help="Datasets to be evaluated",
                        default=None,
                        type=str)
    parser.add_argument('--overload',
                        help="Pairs of parameters to overload",
                        nargs='+',
                        type=str)
    parser.add_argument('--cnn-out', action='store_true')
    args = parser.parse_args(sys_argv)

    qrel_file_flag = False
    if args.metrics:
        assert all(metric in AVAILABLE_METRICS for metric in args.metrics), \
            "Supported metrics %s" % (" ".join(AVAILABLE_METRICS))

        if 'NDCG20' in args.metrics or 'ERR20' in args.metrics:
            qrel_file_flag = True

    config = parse_config(args.data_config)
    if args.overload:
        config = edit_config(config, args.overload)

    # Remove train and dev from data config
    config['datasets'] = {
        dset: config['datasets'][dset]
        for dset in config['datasets'] if dset not in ['train', 'dev']
    }

    # Force test to run on CPU
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

    return config, args, qrel_file_flag
Beispiel #3
0
def initialize_pairs(root, binary):
    dataset = list()
    sets = os.listdir(root)
    sets = [i for i in sets if "set" in i]
    if binary == 2:
        classes = {"cell": 1}
    else:
        classes = utils.parse_config("config.txt")
        assert len(classes)+1 == binary
    assert len(sets) > 1, "No Sets found inside the data directory"
    if args.convert_to_coco:
        os.makedirs("./temp", exist_ok=True)

    for a_set in sets:
        target_json = {"shapes": []}
        set_path = os.path.join(root, a_set)
        tifs = glob.glob(os.path.join(set_path, "raw.tif"))

        pngs = glob.glob(os.path.join(set_path, "*png"))
        assert len(tifs) == 1, "Raw tif not found"
        masks = list()
        for key in classes.keys():
            class_id = classes[key]
            class_name = key + ".png" if binary != 2 else "labeled.png"
            class_label_path = os.path.join(set_path, class_name)
            assert class_label_path in pngs, class_name+" Not Found"
            mask = cv2.imread(class_label_path, -1)
            polygons = mask_to_poly(mask, binary)

            for poly in polygons:
                target_json["shapes"].append(
                    {"label": key, "points": poly})

        print ("Converting", tifs[0])
        with open(tifs[0].replace("tif", "json"), 'w') as f:
            json.dump(target_json, f)
        if args.convert_to_coco:
            im_path = "./temp/" + \
                "_".join(tifs[0].split("/")[-2:])
            target_json["imagePath"] = im_path.split("/")[-1]
            with open(im_path.replace("tif", "json"), "w") as f:
                json.dump(target_json, f)
            im = cv2.imread(tifs[0])
            cv2.imwrite(im_path.replace("json", "tif"), im)

    print ("\nConverting to COCO...")
    json2coco.process(
        **{"labels": "config.txt", "input_dir": "./temp", "output_dir": args.out_dir})
    if os.path.exists("./temp"):
        shutil.rmtree("./temp")
    def __init__(self):
        super(GPT2Encoder, self).__init__(embed_size=768)
        self.codec = get_codec()
        self.gpt2_config = parse_config()
        self.gpt2_model = GPT2(self.gpt2_config)

        if torch.cuda.is_available():
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')
        if not os.path.exists('gpt2-pytorch_model.bin'):
            print("Downloading GPT-2 checkpoint...")
            url = 'https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_model.bin'
            r = requests.get(url, allow_redirects=True)
            open('gpt2-pytorch_model.bin', 'wb').write(r.content)

        self.gpt2_model = load_weight(
            self.gpt2_model,
            torch.load('gpt2-pytorch_model.bin', map_location=device))
        self.gpt2_model = self.gpt2_model.to(device)
        self.gpt2_model.eval()
Beispiel #5
0
def argument_parser(sys_argv):
    # ARGUMENT HANDLING
    parser = argparse.ArgumentParser(prog='Train models', )
    parser.add_argument('--data-config',
                        help="Data configuration file path",
                        required=True,
                        type=str)
    parser.add_argument('--model-config',
                        help="Model configuration file path",
                        required=True,
                        type=str)
    parser.add_argument('--model-folder',
                        help="Path to save model's outputs",
                        required=True,
                        type=str)
    parser.add_argument('--metrics',
                        help="Metrics to calculate while training model",
                        default=['ERR20', 'NDCG20'],
                        nargs='+',
                        type=str)
    parser.add_argument('--overload',
                        help="Pairs of parameters to overload",
                        nargs='+',
                        type=str)
    parser.add_argument(
        '--round-robin',
        help=
        "If true, does every train combination (every train folder gets to be validation once)",
        default=False,
        action='store_true')
    args = parser.parse_args(sys_argv)

    data_config = parse_config(args.data_config)
    model_config = parse_config(args.model_config)

    if args.round_robin:
        assert 'dev' not in data_config['datasets'], \
            "When using --round-robin, dev can't be specified, put all files under 'train'"
        assert len(data_config['datasets']['train']) >= 2, \
            "Please provide more than 1 file for train when using --round-robin"

        # Get train combinations (leave 1 out for dev)
        train_combinations = []
        aux_dict = {}
        for i, dev_file in enumerate(data_config['datasets']['train']):
            train_combinations.append(([
                'train_%d' % (x + 1)
                for x in range(len(data_config['datasets']['train'])) if x != i
            ], ['dev_%d' % (i + 1)]))
            aux_dict['train_%d' % (i + 1)] = [dev_file]
            aux_dict['dev_%d' % (i + 1)] = [dev_file]

            # For TREC qrel file
            model_config['val_qrel_file_%d' % i] = dev_file

            # For retraining
            if model_config['retrain']:
                model_config['train_qrel_files_%d' % i] = \
                    [d for x, d in enumerate(data_config['datasets']['train']) if x != i]

        # Replace with aux_dict
        data_config['datasets'] = aux_dict
    else:
        # Retain only train and dev
        data_config['datasets'] = {
            'train': data_config['datasets']['train'],
            'dev': data_config['datasets']['dev']
        }
        train_combinations = [(['train'], ['dev'])]

        # For TREC qrel file
        assert len(data_config['datasets']['dev']) == 1, \
            "Only provide one QREL file for dev"
        model_config['val_qrel_file_0'] = data_config['datasets']['dev'][0]

        if model_config['retrain']:
            model_config['train_qrel_files_0'] = data_config['datasets'][
                'train']

    # Pass some keys of model_config to data_config
    data_config['sim_matrix_config'] = model_config['sim_matrix_config']
    data_config['query_idf_config'] = model_config['query_idf_config']
    data_config['num_negative'] = model_config['num_negative']
    data_config['use_description'] = model_config['use_description']
    data_config['use_topic'] = model_config['use_topic']
    data_config['custom_loss'] = model_config['custom_loss']
    if model_config['retrain']:
        data_config['retrain_mode'] = model_config['retrain_mode']

    # if model_config['sim_matrix_config']['use_static_matrices'] and model_config['top_k'] != 0:
    #     raise Exception("'use_embedding_layer' is set to True but 'top_k' != 0 and 'use_static_matrices' set to True, which makes embeddings useless")

    if 'embeddings_path' in data_config:
        model_config['embeddings_path'] = data_config['embeddings_path']
    model_config['model_folder'] = args.model_folder

    for metric in args.metrics + [model_config['metric']]:
        assert metric in AVAILABLE_METRICS, \
            "Unavailable metric %s" % metric

    config = {
        'data': data_config,
        'model': model_config,
        'monitoring_metric': model_config['metric'],
        'metrics': args.metrics,
        'num_gpus': 1
    }

    if args.overload:
        config = edit_config(config, args.overload)
        if 'gpu_device' in config['model']:
            # Bruteforced for Keras/TF
            if not isinstance(config['model']['gpu_device'], tuple):
                config['model']['gpu_device'] = [config['model']['gpu_device']]
            os.environ["CUDA_VISIBLE_DEVICES"] = "%s" % ','.join(
                str(x) for x in config['model']['gpu_device'])
        config['num_gpus'] = len(config['model']['gpu_device'])

    return config, train_combinations
Beispiel #6
0
        model = get_model(config['arch'])

    trainer = eval(config['arch']['algorithm'])(
        config=config,
        model=model,
        criterion=criterion,
        train_loader=train_loader,
        post_process=post_p,
        metric=metric,
        validate_loader=validate_loader,
        converter=converter)
    trainer.train()


if __name__ == '__main__':
    import os
    import sys

    project = 'tianrang-ocr'  # 工作项目根目录
    sys.path.append(os.getcwd().split(project)[0] + project)

    from utils.utils import parse_config

    args = init_args()
    assert os.path.exists(args.config_file)
    config = anyconfig.load(open(args.config_file, 'rb'))
    if 'base' in config:
        config = parse_config(config)
    mapping = Dict(config)
    main(mapping)
Beispiel #7
0
print "\nsnom multicast PnP Provisioning Server (mcserv)\n"
print "(c) 2008-2009 snom technology AG\n"
print "=" * 80

config['prov_uri'] = options.prov_uri

# Configuration file has been provided
#
# NOTE: Local (command-line) options overwrite config file
# 
configuration = ConfigParser.ConfigParser()
if options.config:
    print "Reading configuration from %s" % options.config
    configuration.read(options.config) # Fixme: make sure the file exists
    (config, msconfig) = utils.parse_config(configuration, options)

if not config['multistage']:
    print "Provisioning URI is %s\n" % config['prov_uri']

sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind(('224.0.1.75', options.local_port))
mreq = struct.pack('4sl', socket.inet_aton('224.0.1.75'), socket.INADDR_ANY)
sock.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq)

if not options.local_ip and not config['local_ip']:
    config['local_ip'] = utils.get_ip_address()
else:
    config['local_ip'] = options.local_ip
        dropout=args.dropout).to(device)

logging.info('\nNetwork architecture:\n{}'.format(str(model)))

optim_handle = {
    'adam': optim.Adam,
    'sgd': optim.SGD,
    'adagrad': optim.Adagrad,
    'rmsprop': optim.RMSprop
}
optimizer = optim_handle[args.optim](model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
loss_fun_handle = {'f1': f1_loss, 'bce': nn.BCELoss(), 'hamming': hamming_loss}
criterion = loss_fun_handle[args.loss_fun]
logging.info(parse_config(args.__dict__) + '\n')


def train(epoch):
    t = time.time()

    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = criterion(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()

    acc_train = accuracy(output[idx_train], labels[idx_train])
    f1_train = f1_score(output[idx_train], labels[idx_train])