Ejemplo n.º 1
0
def main(args):
    SEED = 17
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    random.seed(SEED)
    torch.cuda.manual_seed(SEED)

    net = nm.Net()
    path = './trained_model_best.pth'
    d = torch.load(path)
    net.load_state_dict(d['state_dict'])

    net.eval()

    pairs = dl.get_all_pairs(args.data)
    embeddings = {}
    for idx, p in enumerate(pairs):
        if idx % 10000 == 0:
            print(idx)
        with torch.no_grad():
            input = torch.from_numpy(p[1]).view(1, -1)
            embedding = get_embedding(net, input)
            embeddings[p[0]] = embedding.data.numpy()[0]

    df = pandas.DataFrame(data=embeddings)
    out = cmapPy.pandasGEXpress.GCToo.GCToo(df)
    write(out, 'embeddings')
Ejemplo n.º 2
0
def main(args):
    SEED = 17
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    random.seed(SEED)
    torch.cuda.manual_seed(SEED)

    net = nm.Net()
    path = './trained_model_best.pth'
    d = torch.load(path)
    net.load_state_dict(d['state_dict'])

    net.eval()
    net.cuda()

    train_loader, test_loader = dl.get_data(args.data)
    val_loss = 0.
    criterion = torch.nn.MSELoss()
    for batch_idx, batch in enumerate(test_loader):
        inputs = torch.autograd.Variable(batch).cuda()
        with torch.no_grad():
            output = net(inputs)
        loss = criterion(output, inputs)
        val_loss += loss.cpu().data.numpy() * len(inputs)
    val_loss = val_loss / len(test_loader.dataset)
    print(val_loss)
Ejemplo n.º 3
0
def train_net(data, labels):

    # Use the following to instantiate a network
    net = network.Net()

    # Use double precision
    net.double()

    # Put the network on the GPU
    net.cuda()
    # Continue training from a checkpoint by uncommenting:
    #d = torch.load('trained_cnn_model.pth')
    #net.load_state_dict(d['state_dict'])

    # Select your optimization method
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()),
                           lr=1e-4)

    # Uncomment this to use a custom initialization
    """
    bound = 5e-2
    for idx, param in enumerate(net.parameters()):
        if idx == 0:
            param.data.fill_(0)
        else:
            init = torch.Tensor(param.size()).uniform_(-bound, bound)
            param.data = init
    #"""

    num_epochs = 1000000

    #Place your data on the GPU
    inputs = Variable(torch.stack(data).double())
    inputs = inputs.cuda()
    targets = Variable(torch.stack(labels).double())
    targets = targets.cuda()

    best_loss = np.float('inf')

    for i in range(num_epochs):
        # Take 1 step of GD
        train_loss = train_step(net, inputs, targets, optimizer, iteration=i)

        if i % 100 == 0:
            print(i, train_loss, best_loss)
            vis_output(net, inputs)

        # Save the best model if loss is low enough
        if train_loss < best_loss and train_loss < 1e-2:
            best_loss = train_loss
            d = {}
            d['state_dict'] = net.state_dict()
            torch.save(d, 'trained_cnn_model.pth')
            if train_loss < 1e-8:
                break
Ejemplo n.º 4
0
def train_network(train_loader, test_loader):

    # Uncomment below to resume training if needed
    #d = torch.load('trained_model_best.pth')
    net = neural_model.Net()
    #net.load_state_dict(d['state_dict'])

    # Print Num of Params in Model
    params = 0
    for idx, param in enumerate(list(net.parameters())):
        size = 1
        for idx in range(len(param.size())):
            size *= param.size()[idx]
            params += size
    print("NUMBER OF PARAMS: ", params)

    # Custom Initialization if needed
    """
    bound = 1e-10
    for idx, param in enumerate(net.parameters()):
        if idx == len(list(net.parameters())) - 1:
            print(param.size())
            param.data.fill_(0)
        else:
            init = torch.Tensor(param.size()).uniform_(-bound, bound)
            param.data = init
    #"""

    # Adam optimization (but you can try SGD as well)
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
    #optimizer = torch.optim.SGD(net.parameters(), lr=1e-1)

    net.cuda()
    num_epochs = 100000
    best_loss = np.float("inf")

    for i in range(num_epochs):

        print("Epoch: ", i)
        train_loss = train_step(net, optimizer, train_loader)
        print("Train Loss: ", train_loss)
        test_loss = val_step(net, test_loader)
        print("Test Loss: ", test_loss)

        if train_loss < 1e-15:
            break
        if test_loss < best_loss:
            best_loss = test_loss
            net.cpu()
            d = {}
            d['state_dict'] = net.state_dict()
            torch.save(d, 'trained_model_best.pth')
            net.cuda()
        print("Best Test Loss: ", best_loss)
Ejemplo n.º 5
0
def train_net(data, labels):
    global hist_1, hist_2

    dim = data.size()[-1]
    # Use the following to instantiate a network
    net = network.Net(dim=dim)
    for idx, p in enumerate(net.parameters()):
        if idx == 0:
            hist_1 = np.zeros(p.size())
        else:
            hist_2 = np.zeros(p.size())
    #"""
    bound = 1e-1
    for idx, param in enumerate(net.parameters()):
        init = torch.Tensor(param.size()).uniform_(-bound, bound)
        param.data = init

    #"""
    net.double()
    net.cuda()
    optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, net.parameters()),
                              lr=1e-1)



    num_epochs = 10000#1000000
    #Place your data on the GPU
    inputs = Variable(data.double())
    #inputs = Variable(data.float())
    inputs = inputs.cuda()
    targets = Variable(labels.double())
    #targets = Variable(labels.float())
    targets = targets.cuda()

    best_loss = np.float('inf')
    losses =[]
    flag = True
    for i in range(num_epochs):

        train_loss, lr = train_step(net, inputs, targets, optimizer, iteration=i)
        losses.append(train_loss)
        if i % 100 == 0:
            print(i, train_loss, best_loss, "Learning Rate: ", lr)

        # Save the best model if loss is low enough
        if train_loss < best_loss and train_loss < 1e-5:
            best_loss = train_loss
            d = {}
            d['state_dict'] = net.state_dict()
            torch.save(d, 'trained_cnn_model.pth')
    pickle.dump(losses, open('train_loss.p', 'wb'))
Ejemplo n.º 6
0
def main(options):
    seed = options.seed
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)

    net = network.Net()
    d = torch.load('trained_cnn_model.pth')
    net.load_state_dict(d['state_dict'])

    net.double()
    net.cuda()

    train_frames, _ = dataset.make_dataset()
    frames, _ = dataset.make_test_dataset()
    count = 0

    #"""
    for f in train_frames:
        f = deepcopy(f.numpy())
        J = fast_jacobian(net, f, 3 * SIZE * SIZE)
        J = J.view(-1, 3 * SIZE * SIZE)
        J = J.cpu().data.numpy()
        s, _ = eigs(J, k=1, tol=1e-3)
        top = np.abs(s)
        print(top)
        if top < 1:
            count += 1
        del J
    print("Attractors: ", count)
    #"""

    #"""
    avg_error = 0
    for f in frames:
        f = deepcopy(f.numpy())
        count += 1
        error = iterate(net, f)
        #print(error)
        avg_error += error
    print(count)
    print("AVERAGE ERROR: ", avg_error / count)
Ejemplo n.º 7
0
def main(options):
    seed = options.seed
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    dim = options.dim
    n = options.num_samples

    net = nn.Net(dim=dim)
    d = torch.load('trained_cnn_model.pth')
    net.load_state_dict(d['state_dict'])
    net.double()

    for p in net.parameters():
        params = p.data.detach().numpy().reshape(-1)
        plt.hist(params, bins=100)
        plt.savefig('plots/regression_histogram.pdf')

    w = dataset.get_hyperplane(dim)
    x, y = dataset.sample_points(w, n=n)
    x = torch.from_numpy(x.transpose())
Ejemplo n.º 8
0
def main(args):
    SEED = 1717
    np.random.seed(SEED)
    random.seed(SEED)
    net = nm.Net()
    path = './trained_model_best.pth'
    d = torch.load(path)
    net.load_state_dict(d['state_dict'])
    net.double()
    net.eval()
    net.cuda()

    pairs = dl.get_all_pairs(args.data)
    embeddings = []
    cell_types = {}
    idx_to_cell = []
    cell_1 = 'A549'
    cell_2 = 'MCF7'
    fda_approved = set([])

    with open('fda_approved.txt', 'r') as f:
        for line in f:
            fda_approved.add(line.strip())

    pert_cell_map = p.load(open('pert_cell_map.p', 'rb'))
    pert_dose_map = p.load(open('pert_dose_map.p', 'rb'))
    embedding_map = {}
    # Annoyingly pert id is actually pert name in the dataframe
    for i in range(len(pairs)):
        cell_type = pert_cell_map[pairs[i][0]]

        pert_id = pert_dose_map[pairs[i][0]][2]
        pert_type = pert_dose_map[pairs[i][0]][3]
        rna_plate = pert_dose_map[pairs[i][0]][4]
        rna_well = pert_dose_map[pairs[i][0]][5]
        if pert_dose_map[pairs[i][0]][0] < 0:
            continue
        if cell_type != cell_1 and cell_type != cell_2:
            continue
        if pert_id not in fda_approved and pert_id != 'DMSO':
            continue
        if pert_id != 'DMSO' and pert_id != 'vorinostat':
            continue

        cell_type = cell_type + "_" + pert_id
        #if cell_type in cell_types and len(cell_types[cell_type]) > 50:
        #    continue

        idx_to_cell.append(cell_type)
        if cell_type in cell_types:
            cell_types[cell_type].append(i)
        else:
            cell_types[cell_type] = [i]
        input = torch.from_numpy(pairs[i][1]).double().cuda()
        input = input.view(1, -1)
        # Uncomment to use original embedding
        #embedding = pairs[i][1].reshape(1, -1)

        # Recomment when using original embedding
        embedding = get_embedding(net, input)
        embedding = embedding.cpu().data.numpy()

        embedding_map[i] = embedding
        embeddings.append(embedding)

    print(sorted(cell_types.keys()))
    print(len(sorted(cell_types.keys())))
    embeddings = np.concatenate(embeddings, axis=0)
    print(embeddings.shape)

    means = {}
    for key in cell_types:
        points = np.array([embedding_map[i] for i in cell_types[key]])
        means[key] = np.mean(points, axis=0).reshape(-1)

    print("CORRELATIONS:")
    print(get_correlation(means[cell_1 + '_vorinostat'] \
                          - means[cell_1 + '_DMSO'],
                          means[cell_2 + '_vorinostat'] \
                          - means[cell_2 + '_DMSO']))


    reducer = umap.UMAP()
    embedding = reducer.fit_transform(embeddings)
    print("Shape after transform: ", embeddings.shape)
    cell_keys = sorted(cell_types.keys())
    color_map = {cell_keys[i]: i for i in range(len(cell_keys))}
    color_lvl = 8

    rgb = np.array(list(permutations(range(0,256,color_lvl),3)))/255
    colors = sample(list(rgb), len(cell_keys))
    seen = set([])
    group_by_color = {}

    for idx, e in enumerate(embedding):
        cell_type = idx_to_cell[idx]
        if cell_type in group_by_color:
            group_by_color[cell_type].append(e)
        else:
            group_by_color[cell_type] = [e]

    for key in group_by_color:
        points = np.array(group_by_color[key])
        cell_type = key
        plt.plot(points[:, 0], points[:, 1], 'o',
                 color=colors[color_map[cell_type]],
                 label=cell_type,
                 alpha=.5)

    plt.legend(bbox_to_anchor=(1.05, 1), ncol=10)
    plt.savefig('tmp.png', bbox_inches='tight')