コード例 #1
0
def train(working_dir, grid_size, learning_rate, batch_size, num_walks,
          model_type, fn):
    train_props, val_props, test_props = get_props(working_dir,
                                                   dtype=np.float32)
    means_stds = np.loadtxt(working_dir + "/means_stds.csv",
                            dtype=np.float32,
                            delimiter=',')

    # filter out redundant qm8 properties
    if train_props.shape[1] == 16:
        filtered_labels = list(range(0, 8)) + list(range(12, 16))
        train_props = train_props[:, filtered_labels]
        val_props = val_props[:, filtered_labels]
        test_props = test_props[:, filtered_labels]

        means_stds = means_stds[:, filtered_labels]
    if model_type == "resnet18":
        model = ResNet(BasicBlock, [2, 2, 2, 2],
                       grid_size,
                       "regression",
                       feat_nums,
                       e_sizes,
                       num_classes=train_props.shape[1])
    elif model_type == "resnet34":
        model = ResNet(BasicBlock, [3, 4, 6, 3],
                       grid_size,
                       "regression",
                       feat_nums,
                       e_sizes,
                       num_classes=train_props.shape[1])
    elif model_type == "resnet50":
        model = ResNet(Bottleneck, [3, 4, 6, 3],
                       grid_size,
                       "regression",
                       feat_nums,
                       e_sizes,
                       num_classes=train_props.shape[1])
    elif model_type == "densenet121":
        model = densenet121(grid_size,
                            "regression",
                            feat_nums,
                            e_sizes,
                            num_classes=train_props.shape[1])
    elif model_type == "densenet161":
        model = densenet161(grid_size,
                            "regression",
                            feat_nums,
                            e_sizes,
                            num_classes=train_props.shape[1])
    elif model_type == "densenet169":
        model = densenet169(grid_size,
                            "regression",
                            feat_nums,
                            e_sizes,
                            num_classes=train_props.shape[1])
    elif model_type == "densenet201":
        model = densenet201(grid_size,
                            "regression",
                            feat_nums,
                            e_sizes,
                            num_classes=train_props.shape[1])
    else:
        print("specify a valid model")
        return
    model.float()
    model.cuda()
    loss_function_train = nn.MSELoss(reduction='none')
    loss_function_val = nn.L1Loss(reduction='none')
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # if model_type[0] == "r":
    # 	batch_size = 128
    # 	optimizer = torch.optim.SGD(model.parameters(), lr=0.1,
    # 					   momentum=0.9, weight_decay=5e-4, nesterov=True)
    # elif model_type[0] == "d":
    # 	batch_size = 512
    # 	optimizer = torch.optim.SGD(model.parameters(), lr=0.1,
    # 					   momentum=0.9, weight_decay=1e-4, nesterov=True)
    # else:
    # 	print("specify a vlid model")
    # 	return

    stds = means_stds[1, :]
    tl_list = []
    vl_list = []

    log_file = open(fn + "txt", "w")
    log_file.write("start")
    log_file.flush()

    for file_num in range(num_loads):
        if file_num % 20 == 0:
            model_file = open("../../scratch/" + fn + ".pkl", "wb")
            pickle.dump(model, model_file)
            model_file.close()

        log_file.write("load: " + str(file_num))
        print("load: " + str(file_num))
        # Get new random walks
        if file_num == 0:
            t = time.time()
            train_loader, val_loader, test_loader = get_loaders(working_dir, \
                        file_num, \
                        grid_size, \
                        batch_size, \
                        train_props, \
                        val_props=val_props, \
                        test_props=test_props)
            print("load time")
            print(time.time() - t)
        else:
            file_num = random.randint(0, num_walks - 1)
            t = time.time()
            train_loader, _, _ = get_loaders(working_dir, \
                   file_num, \
                   grid_size, \
                   batch_size, \
                   train_props)
            print("load time")
            print(time.time() - t)
        # Train on set of random walks, can do multiple epochs if desired
        for epoch in range(epochs_per_load):
            model.train()
            t = time.time()
            train_loss_list = []
            train_mae_loss_list = []
            for i, (walks_int, walks_float, props) in enumerate(train_loader):
                walks_int = walks_int.cuda()
                walks_int = walks_int.long()
                walks_float = walks_float.cuda()
                walks_float = walks_float.float()
                props = props.cuda()
                outputs = model(walks_int, walks_float)
                # Individual losses for each item
                loss_mae = torch.mean(loss_function_val(props, outputs), 0)
                train_mae_loss_list.append(loss_mae.cpu().detach().numpy())
                loss = torch.mean(loss_function_train(props, outputs), 0)
                train_loss_list.append(loss.cpu().detach().numpy())
                # Loss converted to single value for backpropagation
                loss = torch.sum(loss)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            model.eval()
            val_loss_list = []
            with torch.no_grad():
                for i, (walks_int, walks_float,
                        props) in enumerate(val_loader):
                    walks_int = walks_int.cuda()
                    walks_int = walks_int.long()
                    walks_float = walks_float.cuda()
                    walks_float = walks_float.float()
                    props = props.cuda()
                    outputs = model(walks_int, walks_float)
                    # Individual losses for each item
                    loss = loss_function_val(props, outputs)
                    val_loss_list.append(loss.cpu().detach().numpy())
            # ith row of this array is the losses for each label in batch i
            train_loss_arr = np.array(train_loss_list)
            train_mae_arr = np.array(train_mae_loss_list)
            log_file.write("training mse loss\n")
            log_file.write(str(np.mean(train_loss_arr)) + "\n")
            log_file.write("training mae loss\n")
            log_file.write(str(np.mean(train_mae_arr)) + "\n")
            print("training mse loss")
            print(str(np.mean(train_loss_arr)))
            print("training mae loss")
            print(str(np.mean(train_mae_arr)))
            val_loss_arr = np.concatenate(val_loss_list, 0)
            val_loss = np.mean(val_loss_arr, 0)
            log_file.write("val loss\n")
            log_file.write(str(np.mean(val_loss_arr)) + "\n")
            print("val loss")
            print(str(np.mean(val_loss_arr)))
            # Unnormalized loss is for comparison to papers
            tnl = np.mean(train_mae_arr, 0)
            log_file.write("train normalized losses\n")
            log_file.write(" ".join(list(map(str, tnl))) + "\n")
            print("train normalized losses")
            print(" ".join(list(map(str, tnl))))
            log_file.write("val normalized losses\n")
            log_file.write(" ".join(list(map(str, val_loss))) + "\n")
            print("val normalized losses")
            print(" ".join(list(map(str, val_loss))))
            tunl = stds * tnl
            log_file.write("train unnormalized losses\n")
            log_file.write(" ".join(list(map(str, tunl))) + "\n")
            print("train unnormalized losses")
            print(" ".join(list(map(str, tunl))))
            vunl = stds * val_loss
            log_file.write("val unnormalized losses\n")
            log_file.write(" ".join(list(map(str, vunl))) + "\n")
            log_file.write("\n")
            print("val unnormalized losses")
            print(" ".join(list(map(str, vunl))))
            print("\n")
            print("time")
            print(time.time() - t)
        file_num += 1
        log_file.flush()
    log_file.close()
    return model
def train(working_dir, grid_size, learning_rate, batch_size, num_cores):
    process = psutil.Process(os.getpid())
    print(process.memory_info().rss / 1024 / 1024 / 1024)
    train_feat_dict = get_feat_dict(working_dir + "/train_smiles.csv")
    val_feat_dict = get_feat_dict(working_dir + "/val_smiles.csv")
    test_feat_dict = get_feat_dict(working_dir + "/test_smiles.csv")
    # There are about 0.08 gb
    process = psutil.Process(os.getpid())
    print("pre model")
    print(process.memory_info().rss / 1024 / 1024 / 1024)

    torch.set_default_dtype(torch.float64)
    train_props, val_props, test_props = get_props(working_dir, dtype=int)
    print("pre model post props")
    print(process.memory_info().rss / 1024 / 1024 / 1024)
    model = ResNet(BasicBlock, [2, 2, 2, 2],
                   grid_size,
                   "classification",
                   feat_nums,
                   e_sizes,
                   num_classes=train_props.shape[1])
    model.float()
    model.cuda()
    print("model params")
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print(pytorch_total_params)
    model.cpu()
    print("model")
    print(process.memory_info().rss / 1024 / 1024 / 1024)
    loss_function = masked_cross_entropy
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    tl_list = []
    vl_list = []
    tmra_list = []
    vmra_list = []

    for file_num in range(num_loads):
        # Get new random walks
        if file_num == 0:
            print("before get_loaders")
            process = psutil.Process(os.getpid())
            print(process.memory_info().rss / 1024 / 1024 / 1024)
            train_loader, val_loader, test_loader = get_loaders(num_cores, \
                     working_dir, \
                     file_num, \
                     grid_size, \
                     batch_size, \
                     train_props, \
                     train_feat_dict, \
                     val_props=val_props, \
                     val_feat_dict=val_feat_dict, \
                     test_props=test_props, \
                     test_feat_dict=test_feat_dict)
        else:
            print("before get_loaders 2")
            process = psutil.Process(os.getpid())
            print(process.memory_info().rss / 1024 / 1024 / 1024)
            train_loader, _, _ = get_loaders(num_cores, \
                   working_dir, \
                   file_num, \
                   grid_size, \
                   batch_size, \
                   train_props, \
                   train_feat_dict)
        # Train on a single set of random walks, can do multiple epochs if desired
        for epoch in range(epochs_per_load):
            model.train()
            model.cuda()
            t = time.time()
            train_loss_list = []
            props_list = []
            outputs_list = []
            # change
            for i, (walks_int, walks_float, props) in enumerate(train_loader):
                walks_int = walks_int.cuda()
                walks_int = walks_int.long()
                walks_float = walks_float.cuda()
                walks_float = walks_float.float()
                props = props.cuda()
                props = props.long()
                props_list.append(props)
                outputs = model(walks_int, walks_float)
                outputs_list.append(outputs)
                loss = loss_function(props, outputs)
                train_loss_list.append(loss.item())
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            props = torch.cat(props_list, 0)
            props = props.cpu().numpy()
            outputs = torch.cat(outputs_list, 0)
            outputs = outputs.detach().cpu().numpy()
            # Get train rocauc value
            train_rocaucs = []
            for i in range(props.shape[1]):
                mask = props[:, i] != 2
                train_rocauc = roc_auc_score(props[mask, i], outputs[mask, i])
                train_rocaucs.append(train_rocauc)
            model.eval()
            with torch.no_grad():
                ds = val_loader.dataset
                walks_int = ds.int_feat_tensor
                walks_float = ds.float_feat_tensor
                props = ds.prop_tensor
                walks_int = walks_int.cuda()
                walks_int = walks_int.long()
                walks_float = walks_float.cuda()
                walks_float = walks_float.float()
                props = props.cuda()
                outputs = model(walks_int, walks_float)
                loss = loss_function(props, outputs)
                props = props.cpu().numpy()
                outputs = outputs.cpu().numpy()
                val_rocaucs = []
                for i in range(props.shape[1]):
                    mask = props[:, i] != 2
                    val_rocauc = roc_auc_score(props[mask, i], outputs[mask,
                                                                       i])
                    val_rocaucs.append(val_rocauc)
            print("load: " + str(file_num) + ", epochs: " + str(epoch))
            print("training loss")
            # Slightly approximate since last batch can be smaller...
            tl = statistics.mean(train_loss_list)
            print(tl)
            print("val loss")
            vl = loss.item()
            print(vl)
            print("train mean roc auc")
            tmra = sum(train_rocaucs) / len(train_rocaucs)
            print(tmra)
            print("val mean roc auc")
            vmra = sum(val_rocaucs) / len(val_rocaucs)
            print(vmra)
            print("time")
            print(time.time() - t)
            tl_list.append(tl)
            vl_list.append(vl)
            tmra_list.append(tmra)
            vmra_list.append(vmra)
            model.cpu()
        file_num += 1
        del train_loader
    save_plot(tl_list, vl_list, tmra_list, vmra_list)
    return model