Esempio n. 1
0
def train_main(config):
    bsz = config.batch_size

    tf.set_random_seed(config.seed)
    X = tf.placeholder(tf.float32,
                       shape=(bsz, config.input_steps, feature_dim))
    Y_label = tf.placeholder(tf.int32, [None, config.num_classes])
    Y_bbox = tf.placeholder(tf.float32, [None, 3])
    Index = tf.placeholder(tf.int32, [bsz + 1])
    LR = tf.placeholder(tf.float32)

    optimizer, loss, trainable_variables = \
        train_operation(X, Y_label, Y_bbox, Index, LR, config)

    model_saver = tf.train.Saver(var_list=trainable_variables, max_to_keep=2)

    sess = tf.InteractiveSession(config=tf.ConfigProto(
        log_device_placement=False))

    tf.global_variables_initializer().run()

    # initialize parameters or restore from previous model
    if not os.path.exists(models_dir):
        os.makedirs(models_dir)
    if os.listdir(models_dir) == [] or config.initialize:
        init_epoch = 0
        print("Initializing Network")
    else:
        init_epoch = int(config.steps)
        restore_checkpoint_file = join(models_dir,
                                       'model-ep-' + str(config.steps - 1))
        model_saver.restore(sess, restore_checkpoint_file)

    batch_train_dataX, batch_train_gt_label, batch_train_gt_info, batch_train_index = \
        get_train_data(config, mode, pretrain_dataset, True)
    num_batch_train = len(batch_train_dataX)

    for epoch in range(init_epoch, config.training_epochs):

        loss_info = []

        for idx in range(num_batch_train):
            feed_dict = {
                X: batch_train_dataX[idx],
                Y_label: batch_train_gt_label[idx],
                Y_bbox: batch_train_gt_info[idx],
                Index: batch_train_index[idx],
                LR: config.learning_rates[epoch]
            }
            _, out_loss = sess.run([optimizer, loss], feed_dict=feed_dict)

            loss_info.append(out_loss)

        print("Training epoch ", epoch, " loss: ", np.mean(loss_info))

        if epoch == config.training_epochs - 2 or epoch == config.training_epochs - 1:
            model_saver.save(sess, models_file_prefix, global_step=epoch)
Esempio n. 2
0
def train(args):
    global TRAIN_BATCH_SIZE, LEARNING_RATE

    TRAIN_BATCH_SIZE = args.batch if args.batch else TRAIN_BATCH_SIZE
    print("train batch size", TRAIN_BATCH_SIZE)
    LEARNING_RATE = args.lr if args.lr else LEARNING_RATE
    print("learning_rate", LEARNING_RATE)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    print(device, n_gpu)

    model, tokenizer = load_model_and_tokenizer(lm=args.lm,
                                                model_dir=args.model_dir)

    all_exs = []
    all_data = []

    if not args.num_examples:
        args.num_examples = [None] * len(args.data)
    else:
        args.num_examples += [None] * (len(args.data) - len(args.num_examples))
    print(args.data, args.num_examples)

    for data_source, num_exs in zip(args.data, args.num_examples):
        exs, data = get_train_data(data_source, tokenizer, lm=args.lm,
                       num_examples=num_exs, mask=args.mask, distant_source=args.distant_source)
        all_exs.append(exs)
        all_data.append(data)

    '''
    if args.unsup:
      if args.unsup.endswith(".pkl"):
        inputs = pickle.load(open(args.unsup, 'rb'))
        u_exs = inputs['exs']
        u_data = inputs['old_data']
        u_new_data = inputs['new_data']
      else:
        assert args.unsup in set(["matres", "udst"])
        u_exs, u_data = get_train_data(args.unsup, lm=arg.lm, num_examples=args.unsup_num_examples, mask=args.mask)  
      print(len(u_exs), "unsup examples loaded")
      UNSUP_BATCH_SIZE = args.unsup_batch if args.unsup_batch else int(TRAIN_BATCH_SIZE/2)
      uda_dataset = UdaDataset(u_exs, UNSUP_BATCH_SIZE)
    '''

    OUTPUT_DIR = args.output_dir if args.output_dir else "models/scratch/"
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)

    if len(all_exs) == 0:
        print("no dataset specified")
    elif len(all_exs) == 1:
        print("one dataset specified")
        exs = all_exs[0]
        data = all_data[0]
    else:
        print("using multiple data sources")
        inputs = []
        for i in range(len(all_data[0].tensors)):
            inputs.append(torch.cat([d.tensors[i] for d in all_data]))

        exs = list(chain(*all_exs))
        data = TensorDataset(*inputs)


    data_sampler = RandomSampler(data)
    dataloader = DataLoader(data, sampler=data_sampler,
                            batch_size=TRAIN_BATCH_SIZE)

    print(len(data), len(exs), "examples loaded")

    num_train_optimization_steps = int(
        len(data) / TRAIN_BATCH_SIZE / GRADIENT_ACCUMULATION_STEPS) * NUM_TRAIN_EPOCHS
    print(num_train_optimization_steps, "optimization steps")
    num_warmup_steps = WARMUP_PROPORTION * num_train_optimization_steps

    model.to(device)

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())

    # hack to remove pooler, which is not used
    # thus it produce None grad that break apex
    param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(
        nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(
        nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    optimizer = AdamW(optimizer_grouped_parameters,
              lr=LEARNING_RATE,
              correct_bias=False)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                            num_warmup_steps=num_warmup_steps,
                            num_training_steps=num_train_optimization_steps)  # PyTorch scheduler

    if args.serialize:
        inputs = {"exs": exs, "data": data}
        pickle.dump(inputs, open(OUTPUT_DIR+"inputs.pkl", 'wb'))

    logfile = open(OUTPUT_DIR + "/log.txt", "w+")
    print(args, file=logfile)
    print(len(data), len(exs), "examples loaded", file=logfile)
    count_labels(exs, file=logfile)
    count_labels(exs)
    print("learning_rate", LEARNING_RATE, file=logfile)

    global_step = 0
    num_epochs = args.epochs if not args.epochs is None else int(NUM_TRAIN_EPOCHS)
    if num_epochs == 0:
        exit()
    model.train()
    exs_cpy = exs

    for ep in trange(num_epochs, desc="Epoch"):
        last_loss_kldiv = 0
        for step, batch in enumerate(tqdm(dataloader,
                          desc="Iteration " + str(ep),
                          disable=args.disable_tqdm)):
            bbatch = tuple(t.to(device) for t in batch)
            loss, _, _ = model(*bbatch)

            '''
            if args.unsup:
                loss_kldiv = unsup_loss(model)
                if loss_kldiv:
                last_loss_kldiv = loss_kldiv.item()
                loss += loss_kldiv
            '''

            loss.backward()
            if step % 100 == 0:
                print("Loss: %.3f at step %d" % (loss.item(), step), file=logfile)
                # if args.unsup and last_loss_kldiv:
                #  print("Unsup Loss: %.3f at step %d" %(last_loss_kldiv, step), file=logfile)
            optimizer.step()
            scheduler.step()
            model.zero_grad()
            global_step += 1

            # Save a trained model, configuration and tokenizer
            model_output_dir = OUTPUT_DIR + "/output_" + str(ep) + "/"
            if not os.path.exists(model_output_dir):
                os.makedirs(model_output_dir)
            model.save_pretrained(model_output_dir)
            tokenizer.save_pretrained(model_output_dir)
Esempio n. 3
0
def train():
    X_train, y_train = get_train_data()
    X_test, y_test = get_valid_data()
    # data generater
    train_gen = ImageDataGenerator(rescale=1.0 / 255,
                                   horizontal_flip=True,
                                   width_shift_range=4.0 / 32.0,
                                   height_shift_range=4.0 / 32.0)
    test_gen = ImageDataGenerator(rescale=1.0 / 255)
    # train_gen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, zca_whitening=True, horizontal_flip=True,)
    # test_gen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, zca_whitening=True)
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    # load network
    model = ShuffleNet_V2()
    opt = SGD(0.01, momentum=0.9)
    #model.compile(Adam(0.001), "categorical_crossentropy", ["accuracy"])
    model.compile(optimizer=opt,
                  loss="categorical_crossentropy",
                  metrics=['accuracy'])
    #model.compile(SGD(0.01, momentum = 0.9), "categorical_crossentropy", ["acc", "top_k_categorical_accuracy"])
    model.summary()

    # set GPU
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)
    session.run(tf.global_variables_initializer())
    KTF.set_session(session)

    # set
    batch_size = 128
    scheduler = LearningRateScheduler(lr_scheduler)
    hist = History()

    start_time = time.time()
    #model.fit_generator(train_gen.flow(X_train, y_train, batch_size, shuffle=True),
    #                    steps_per_epoch=X_train.shape[0]//batch_size,
    #                    validation_data=test_gen.flow(X_test, y_test, batch_size, shuffle=False),
    #                    validation_steps=X_test.shape[0]//batch_size,
    #                    callbacks=[scheduler, hist], max_queue_size=5, epochs=100)
    model.fit_generator(train_gen.flow(X_train,
                                       y_train,
                                       batch_size,
                                       shuffle=True),
                        steps_per_epoch=X_train.shape[0] // batch_size,
                        validation_data=test_gen.flow(X_test,
                                                      y_test,
                                                      batch_size,
                                                      shuffle=False),
                        validation_steps=X_test.shape[0] // batch_size,
                        callbacks=[scheduler, hist],
                        max_queue_size=5,
                        epochs=50)

    elapsed = time.time() - start_time
    print('training time', elapsed)

    history = hist.history
    history["elapsed"] = elapsed

    with open("shuffle_v2_002_glp.pkl", "wb") as fp:
        pickle.dump(history, fp)
def loadRawData():
    '''
    Download global variables of systems.
    Each variable is a list of values for each system.
    Coulomb matrices follow the same order in indexing.
    '''
    # Spacegroup of the systems
    global spacegrp
    # Number of atoms
    global Natoms
    # Percentage of Al
    global pc_al
    # % Ga
    global pc_ga
    # % In
    global pc_in
    global lv_alpha
    global lv_beta
    global lv_gamma
    global lvadeg
    global lvbdeg
    global lvgdeg
    # Formation energies LABELS
    global Ef
    # Band gap energies LABELS
    global Eg
    # Training set atom coordinates
    global xyz_Train
    # Training set atom elements (str)
    global elements_Train
    # Training set lattice vectors
    global lattices_Train

    filename = hyppar.datapath + 'data/train.csv'
    spacegrp, Natoms, pc_al, pc_ga, pc_in, lv_alpha, lv_beta, lv_gamma, lvadeg, lvbdeg, lvgdeg, Ef, Eg = load_data.get_train_data(
        filename)
    xyz_Train, elements_Train, lattices_Train = load_data.get_geometry(
        hyppar.Ndata, hyppar.datapath + 'data')