Example #1
0
    parser.add_argument("--batch_size", type=int, default=32,
                        help="size of batch")
    parser.add_argument("--ligand_receptor_pair_path", type=str, default="mouse_ligand_receptor_pair",
                        help="gene ligand receptor pair path")
    parser.add_argument("--pretrained_model_path", type=str, default="checkpoints_default.pth",
                        help="pretrained_model_path")
    parser.add_argument("--load_pretrained_model", type=int, default=0,
                        help="load_pretrained_model")                                   
    parser.add_argument("--save_model_path", type=str, default="checkpoints_default.pth",
                        help="save_model_path")
    parser.add_argument("--train_dataset", type=str, default="train_dataset",
                        help="train dataset")
    parser.add_argument("--test_dataset", type=str, default="test_dataset",
                        help="test dataset")
    parser.add_argument("--just_train", type=int, default=0,
                        help="nothing, for debug")
    parser.add_argument("--each_dataset_size", type=int, default=0,
                        help="0 represent all")
    parser.add_argument("--using_mmd", type=int, default=0,
                        help="if using mmd loss, 0 is not using")            
    params = parser.parse_args()
    print(params)
    
    set_seed(params.random_seed)
    # print(random.random())
    # print(np.random.random())
    # print(torch.rand(2))

    trainer = Trainer(params)
    trainer.train()
Example #2
0
def load_tissue(params=None):
    random_seed = params.random_seed
    dense_dim = params.dense_dim 
    set_seed(random_seed)
    # 400 0.7895
    # 200 0.5117
    # 100 0.3203
    #  50 0.2083
    """
    root = '../data/mammary_gland'
    num = 2915
    data_path = f'{root}/mouse_Mammary_gland{num}_data.csv'
    type_path = f'{root}/mouse_Mammary_gland{num}_celltype.csv'
    """
    data_path = '../data/mouse_data/mouse_brain_2915_data.csv'
    type_path = '../data/mouse_data/mouse_brain_2915_celltype.csv'

    # load celltype file then update labels accordingly
    cell2type = pd.read_csv(type_path, index_col=0)
    cell2type.columns = ['cell', 'type']

    id2label = cell2type['type'].drop_duplicates(keep='first').tolist()
    label2id = {label: idx for idx, label in enumerate(id2label)}
    print(f'{len(id2label)} classes in total')
        
    cell2type['id'] = cell2type['type'].map(label2id)
    assert not cell2type['id'].isnull().any(), 'something wrong about celltype file.'

    # load data file
    data = pd.read_csv(data_path, index_col=0)
    data = data.transpose(copy=True)
    assert cell2type['cell'].tolist() == data.index.tolist()
    print(f'{data.shape[0]} cells, {data.shape[1]} genes.')
    # genes
    id2gene = data.columns.tolist()
    gene2id = {gene: idx for idx, gene in enumerate(id2gene)}

    # construct graph and add nodes and edges
    graph = dgl.DGLGraph()
    start = time()
    # 1. add all genes as nodes
    num_genes = len(id2gene)
    graph.add_nodes(num_genes)
    # maintain a kind of sparse idx for Graph
    row_idx, col_idx = data.to_numpy().nonzero()
    row_idx = row_idx + num_genes
    # 2. add cell nodes and edges
    num_cells = data.shape[0]
    graph.add_nodes(num_cells)
    graph.add_edges(row_idx, col_idx)
    graph.add_edges(col_idx, row_idx)
    print(f'Added {num_cells} nodes and {len(row_idx)} edges.')
    print(f'#Nodes: {graph.number_of_nodes()}, #Edges: {graph.number_of_edges()}.')
    print(data.head())

    # reduce sparse features to dense features
    cell_pca = PCA(n_components=dense_dim, random_state=random_seed)
    cell_pca.fit(data.values)
    cell_feat = cell_pca.transform(data.values)
    cell_feat = torch.FloatTensor(cell_feat)

    gene_pca = PCA(n_components=dense_dim, random_state=random_seed)
    gene_pca.fit(data.T.values)
    gene_feat = gene_pca.transform(data.T.values)
    gene_feat = torch.FloatTensor(gene_feat)

    feat = torch.cat([gene_feat, cell_feat], dim=0)
    # feat = torch.zeros(graph.number_of_nodes(), dense_dim).normal_()

    cell_evr = sum(cell_pca.explained_variance_ratio_) * 100
    gene_evr = sum(gene_pca.explained_variance_ratio_) * 100
    print(f'[PCA] Cell EVR: {cell_evr:.2f}%. Gene EVR: {gene_evr:.2f} %.')
    # generate labels for training and testing
    labels = torch.LongTensor(cell2type['id'].tolist())
    train_mask = torch.zeros(num_cells, dtype=torch.bool)
    train_randidx = torch.randperm(num_cells)[:int(num_cells * 0.8)]
    # generate mask
    train_mask[train_randidx] = True
    test_mask = ~train_mask
    return num_cells, num_genes, graph, feat, labels, train_mask, test_mask
Example #3
0
        model_name = type(model).__name__
        # TODO: path

        save_path = "..."
        os.makedirs(save_path, exist_ok=True)
        torch.save(model.state_dict(), save_path)
        print(f"saving model done!")
        return save_path


if __name__ == '__main__':
    cfg = cfg()
    args = cfg.get_args()
    cfg.update_train_configs(args)
    set_seed(cfg.random_seed)

    pprint.pprint(args)

    logger = initialize_exp(cfg)
    logger_path = get_dump_path(cfg)

    if not cfg.no_tensorboard:
        writer = SummaryWriter(
            log_dir=os.path.join(logger_path, 'tensorboard'))

    # print("print c to continue...")

    torch.cuda.set_device(cfg.gpu)
    runner = Runner(cfg)
    runner.run()