Exemple #1
0
def test_strata(valexamples, model):
    with torch.no_grad():
        model.eval()
        results = []
        labels = []
        labelvec = torch.zeros(batch_size, dtype=torch.float32, device='cuda')
        for pos in range(0, len(valexamples), batch_size):
            batch = valexamples[pos:pos + batch_size]
            if len(batch) < batch_size:  #wrap last batch
                batch += valexamples[:batch_size - len(batch)]
            batch = molgrid.ExampleVec(batch)
            batch.extract_label(
                0, labelvec
            )  # extract first label (there is only one in this case)

            gmaker.forward(batch, input_tensor, 2, random_rotation=True
                           )  #create grid; randomly translate/rotate molecule
            output = model(input_tensor)
            results.append(output.detach().cpu().numpy())
            labels.append(labelvec.detach().cpu().numpy())

        results = np.array(results).flatten()
        labels = np.array(labels).flatten()
        valrmse = np.sqrt(np.mean((results - labels)**2))
        if np.isinf(valrmse):
            valrmse = 1000
        valame = np.mean(np.abs(results - labels))
        print("Validation", valrmse, valame)
        wandb.log({'valrmse': valrmse, 'valame': valame})
        wandb.log({'valpred': results, 'valtrue': labels})
Exemple #2
0
def train_strata(strata, model, optimizer, losses, maxepoch, stop=20000, initloss=1000):
    bestindex = len(losses) #position    
    bestloss=100000
    for _ in range(maxepoch):  #do at most MAXEPOCH epochs, but should bail earlier
        np.random.shuffle(strata)
        for pos in range(0,len(strata),batch_size):
            batch = strata[pos:pos+batch_size]
            if len(batch) < batch_size: #wrap last batch
                batch += strata[:batch_size-len(batch)]
            batch = molgrid.ExampleVec(batch)
            batch.extract_label(0,labels) # extract first label (there is only one in this case)

            gmaker.forward(batch, input_tensor, 2, random_rotation=True)  #create grid; randomly translate/rotate molecule
            output = model(input_tensor) #run model
            loss = F.smooth_l1_loss(output,labels)  # THIS PART DIFFERENT
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(),10)

            optimizer.step()
            losses.append(float(loss))
            trailing = np.mean(losses[-TRAIL:])
            
            if trailing < bestloss:
                bestloss = trailing
                bestindex = len(losses)
            
            wandb.log({'loss': float(loss),'trailing':trailing,'bestloss':bestloss,'stratasize':len(strata),'lr':optimizer.param_groups[0]['lr']})
            
            if len(losses)-bestindex > stop and bestloss < initloss:
                return bestloss # "converged"
    return bestloss
Exemple #3
0
def train_strata(strata, model, optimizer, losses, maxepoch, stop=20000):
    bestloss = 100000  #best trailing average loss we've seen so far in this strata
    bestindex = len(losses)  #position
    for _ in range(
            maxepoch):  #do at most MAXEPOCH epochs, but should bail earlier
        np.random.shuffle(strata)
        for pos in range(0, len(strata), batch_size):
            batch = strata[pos:pos + batch_size]
            if len(batch) < batch_size:  #wrap last batch
                batch += strata[:batch_size - len(batch)]
            batch = molgrid.ExampleVec(batch)
            batch.extract_label(
                0,
                labels)  # extract first label (there is only one in this case)

            gmaker.forward(batch, input_tensor, 2, random_rotation=True
                           )  #create grid; randomly translate/rotate molecule
            output = model(input_tensor)  #run model
            loss = F.smooth_l1_loss(output.flatten(), labels.flatten())
            loss.backward()

            if args.clip > 0:
                nn.utils.clip_grad_norm_(model.parameters(), args.clip)

            optimizer.step()
            losses.append(float(loss))
            trailing = np.mean(losses[-TRAIL:])

            if trailing < bestloss:
                bestloss = trailing
                bestindex = len(losses)
                torch.save(
                    model.state_dict(),
                    os.path.join(
                        wandb.run.dir,
                        'model_better_%d_%d_%f.pt' % (_, pos, bestloss)))
            if (pos % 100) == 0:
                wandb.log({
                    'loss': float(loss),
                    'trailing': trailing,
                    'bestloss': bestloss,
                    'stratasize': len(strata),
                    'lr': optimizer.param_groups[0]['lr']
                })

            if len(losses) - bestindex > stop:
                return True  # "converged"
    return False
Exemple #4
0
def test_examplevec():
    m = pybel.readstring('smi','c1ccccc1CO')
    m.addh()
    m.make3D()
    
    c = molgrid.CoordinateSet(m,molgrid.ElementIndexTyper())
    c2 = molgrid.CoordinateSet(m)

    c2.make_vector_types() #this should not screw up index types
    
    ex = molgrid.Example()
    ex.coord_sets.append(c)
    ex.labels.append(0)
    
    ex2 = molgrid.Example()
    ex2.coord_sets.append(c2)
    ex2.labels.append(1)
    
    evec = molgrid.ExampleVec([ex,ex2])    
Exemple #5
0
        typer = molgrid.PythonCallbackIndexTyper(mytyper, 4,
                                                 ['H', 'C', 'N', 'O'])

        tensor_shape = (1, ) + dims
        input_tensor = torch.zeros(tensor_shape,
                                   dtype=torch.float32,
                                   device=device)

        predictions = []
        with torch.no_grad():
            labelvec = torch.zeros(1, dtype=torch.float32, device=device)

            c = molgrid.CoordinateSet(mol, typer)
            ex = molgrid.Example()
            ex.coord_sets.append(c)
            batch = molgrid.ExampleVec([ex])
            types = c.type_index.tonumpy()
            tcnts = np.array([np.count_nonzero(types == i) for i in range(4)])
            base = linmodel.predict([tcnts])

            start = time.time()
            for _ in range(args.n):
                gmaker.forward(
                    batch,
                    input_tensor,
                    random_translation=2,
                    random_rotation=True
                )  #create grid; randomly translate/rotate molecule
                output = model(input_tensor).cpu().numpy()
                pred = base[0] + output[0][0]
                #print(pred)