def build_standard_net(layer_sizes, normalize, L2_reg, L1_reg=0.0, activation_function=relu,
                       nll_func=mean_squared_error):
    """Just a plain old neural net, nothing to do with molecules.
    layer sizes includes the input size."""
    layer_sizes = layer_sizes + [1]

    parser = WeightsParser()
    for i, shape in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        parser.add_weights(('weights', i), shape)
        parser.add_weights(('biases', i), (1, shape[1]))

    def predictions(W_vect, X):
        cur_units = X
        for layer in range(len(layer_sizes) - 1):
            cur_W = parser.get(W_vect, ('weights', layer))
            cur_B = parser.get(W_vect, ('biases', layer))
            cur_units = np.dot(cur_units, cur_W) + cur_B
            if layer < len(layer_sizes) - 2:
                if normalize:
                    cur_units = batch_normalize(cur_units)
                cur_units = activation_function(cur_units)
        return cur_units[:, 0]

    def loss(w, X, targets):
        assert len(w) > 0
        log_prior = -L2_reg * np.dot(w, w) / len(w) - L1_reg * np.mean(np.abs(w))
        preds = predictions(w, X)
        return nll_func(preds, targets) - log_prior

    return loss, predictions, parser
def build_fingerprint_deep_net(net_params, fingerprint_func, fp_parser, fp_l2_penalty):
    """Composes a fingerprint function with signature (smiles, weights, params)
     with a fully-connected neural network."""
    net_loss_fun, net_pred_fun, net_parser = build_standard_net(**net_params)
    # import pdb; pdb.set_trace()
    combined_parser = WeightsParser()
    combined_parser.add_weights('fingerprint weights', (len(fp_parser),))
    combined_parser.add_weights('net weights', (len(net_parser),))

    def unpack_weights(weights):
        fingerprint_weights = combined_parser.get(weights, 'fingerprint weights')
        net_weights         = combined_parser.get(weights, 'net weights')
        # import pdb;pdb.set_trace()
        return fingerprint_weights, net_weights

    def loss_fun(weights, smiles, targets):                            # V: Came from line # 53 in regression.py
        fingerprint_weights, net_weights = unpack_weights(weights)
        fingerprints = fingerprint_func(fingerprint_weights, smiles)   # V: fingerprint_func refers to line #108 output_layer_func(weights,smiles) in build_covnet.py,
                                                                       # This gives you the fixed size fingerprints of molecules 
        # import pdb; pdb.set_trace()
        net_loss = net_loss_fun(net_weights, fingerprints, targets)    # V: This refers to def loss(w, X, targets): of line #55
        if len(fingerprint_weights) > 0 and fp_l2_penalty > 0:         # V: True only while doing neural fingerprint experiment (and not morgan)
            # print("In the fingerprint check")
            return net_loss + fp_l2_penalty * np.mean(fingerprint_weights**2)  # V: Didnt quite understand the loss
        else:
            return net_loss

    def pred_fun(weights, smiles):
        fingerprint_weights, net_weights = unpack_weights(weights)
        fingerprints = fingerprint_func(fingerprint_weights, smiles)
        return net_pred_fun(net_weights, fingerprints)

    return loss_fun, pred_fun, combined_parser
def build_standard_net(layer_sizes, normalize, L2_reg, L1_reg=0.0, activation_function=relu,
                       nll_func=mean_squared_error):
    """Just a plain old neural net, nothing to do with molecules.
    layer sizes includes the input size."""
    layer_sizes = layer_sizes + [1]

    parser = WeightsParser()
    for i, shape in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        parser.add_weights(('weights', i), shape)
        parser.add_weights(('biases', i), (1, shape[1]))

    def predictions(W_vect, X):   # V: Makes predictions for the neural net that rests on fingerprints
        cur_units = X   # V: fingerprint of 100 molecules. Size: (100,50) 
        for layer in range(len(layer_sizes) - 1):   #V: Layer Sizes = [50, 100, 1]
            cur_W = parser.get(W_vect, ('weights', layer))
            cur_B = parser.get(W_vect, ('biases', layer))
            cur_units = np.dot(cur_units, cur_W) + cur_B
            # import pdb; pdb.set_trace()
            if layer < len(layer_sizes) - 2:
                if normalize:
                    cur_units = batch_normalize(cur_units)
                cur_units = activation_function(cur_units)
        return cur_units[:, 0]   #V: np.shape(cur_units) = (100,1)

    def loss(w, X, targets):
        assert len(w) > 0
        log_prior = -L2_reg * np.dot(w, w) / len(w) - L1_reg * np.mean(np.abs(w))   # V: Didnt quite understand the loss
        preds = predictions(w, X)
        return nll_func(preds, targets) - log_prior

    return loss, predictions, parser
def build_fingerprint_deep_net(net_params, fingerprint_func, fp_parser, fp_l2_penalty):
    """Composes a fingerprint function with signature (smiles, weights, params)
     with a fully-connected neural network."""
    net_loss_fun, net_pred_fun, net_parser = build_standard_net(**net_params)

    combined_parser = WeightsParser()
    combined_parser.add_weights('fingerprint weights', (len(fp_parser),))
    combined_parser.add_weights('net weights', (len(net_parser),))

    def unpack_weights(weights):
        fingerprint_weights = combined_parser.get(weights, 'fingerprint weights')
        net_weights         = combined_parser.get(weights, 'net weights')
        return fingerprint_weights, net_weights

    def loss_fun(weights, smiles, targets):
        fingerprint_weights, net_weights = unpack_weights(weights)
        fingerprints = fingerprint_func(fingerprint_weights, smiles)
        net_loss = net_loss_fun(net_weights, fingerprints, targets)
        if len(fingerprint_weights) > 0 and fp_l2_penalty > 0:
            return net_loss + fp_l2_penalty * np.mean(fingerprint_weights**2)
        else:
            return net_loss

    def pred_fun(weights, smiles):
        fingerprint_weights, net_weights = unpack_weights(weights)
        fingerprints = fingerprint_func(fingerprint_weights, smiles)
        return net_pred_fun(net_weights, fingerprints)

    return loss_fun, pred_fun, combined_parser
def build_mean_predictor(loss_func):
    parser = WeightsParser()
    parser.add_weights('mean', (1,))
    def loss_fun(weights, smiles, targets):
        mean = parser.get(weights, 'mean')
        return loss_func(np.full(targets.shape, mean), targets)
    def pred_fun(weights, smiles):
        mean = parser.get(weights, 'mean')
        return np.full((len(smiles),), mean)
    return loss_fun, pred_fun, parser
def build_mean_predictor(loss_func):
    parser = WeightsParser()
    parser.add_weights('mean', (1,))
    def loss_fun(weights, smiles, targets):
        mean = parser.get(weights, 'mean')
        return loss_func(np.full(targets.shape, mean), targets)
    def pred_fun(weights, smiles):
        mean = parser.get(weights, 'mean')
        return np.full((len(smiles),), mean)
    return loss_fun, pred_fun, parser
def build_double_convnet_fingerprint_fun(**kwargs):

    fp_fun1, parser1 = build_convnet_fingerprint_fun(**kwargs)
    fp_fun2, parser2 = build_convnet_fingerprint_fun(**kwargs)

    def double_fingerprint_fun(weights, smiles_tuple):
        smiles1, smiles2 = zip(*smiles_tuple)
        fp1 = fp_fun1(weights, smiles1)
        fp2 = fp_fun2(weights, smiles2)
        return zip(fp1, fp2)

    combined_parser = WeightsParser()
      combined_parser.add_weights('weights1', len(parser1))
def build_fingerprint_deep_net(net_params, fingerprint_func, fp_parser, fp_l2_penalty):
    """Composes a fingerprint function with signature (smiles, weights, params)
     with a fully-connected neural network."""
    net_loss_fun, net_pred_fun, net_parser = build_standard_net(**net_params)

    combined_parser = WeightsParser()
    combined_parser.add_weights('fingerprint weights', (len(fp_parser),))
    combined_parser.add_weights('net weights', (len(net_parser),))

    def unpack_weights(weights):
        fingerprint_weights = combined_parser.get(weights, 'fingerprint weights')
        net_weights         = combined_parser.get(weights, 'net weights')
        return fingerprint_weights, net_weights

    def loss_fun(weights, smiles, targets):
        fingerprint_weights, net_weights = unpack_weights(weights)
        fingerprints = fingerprint_func(fingerprint_weights, smiles)
        net_loss = net_loss_fun(net_weights, fingerprints, targets)
        if len(fingerprint_weights) > 0 and fp_l2_penalty > 0:
            return net_loss + fp_l2_penalty * np.mean(fingerprint_weights**2)
        else:
            return net_loss

    def pred_fun(weights, smiles):
        fingerprint_weights, net_weights = unpack_weights(weights)
        fingerprints = fingerprint_func(fingerprint_weights, smiles)
        return net_pred_fun(net_weights, fingerprints)

    return loss_fun, pred_fun, combined_parser
def build_lxr_deep_net(net_params, fingerprint_func, fp_parser, fp_l2_penalty):
  
    net_loss_fun, net_pred_fun, net_parser = build_lxr_net(**net_params)

    combined_parser = WeightsParser()
    combined_parser.add_weights('fingerprint weights', (len(fp_parser),))
    combined_parser.add_weights('net weights', (len(net_parser),))

    def unpack_weights(weights):
        fingerprint_weights = combined_parser.get(weights, 'fingerprint weights')
        net_weights         = combined_parser.get(weights, 'net weights')
        return fingerprint_weights, net_weights
  
    def loss_fun(weights, smiles, targets):
        fingerprint_weights, net_weights = unpack_weights(weights)
        fingerprints = fingerprint_func(fingerprint_weights, smiles)
        return net_loss_fun(net_weights, fingerprints, targets)
    
    def pred_fun(weights, smiles):
        fingerprint_weights, net_weights = unpack_weights(weights)
        fingerprints = fingerprint_func(fingerprint_weights, smiles)
        predictions = sigmoid(net_pred_fun(net_weights, fingerprints))
        return np.around(predictions).astype(bool)

    return loss_fun, pred_fun, combined_parser
def build_lxr_net(layer_sizes, normalize, L2_reg, L1_reg=0.0, activation_function=relu,
                       nll_func=binary_classification_nll):
    """Just a plain old neural net, nothing to do with molecules.
    layer sizes includes the input size."""
    layer_sizes = layer_sizes + [1]

    parser = WeightsParser()
    for i, shape in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        parser.add_weights(('weights', i), shape)
        parser.add_weights(('biases', i), (1, shape[1]))

    def predictions(W_vect, X):
        cur_units = X
        for layer in range(len(layer_sizes) - 1):
            cur_W = parser.get(W_vect, ('weights', layer))
            cur_B = parser.get(W_vect, ('biases', layer))
            cur_units = np.dot(cur_units, cur_W) + cur_B
            if layer < len(layer_sizes) - 2:
                if normalize:
                    cur_units = batch_normalize(cur_units)
                cur_units = activation_function(cur_units)
        return cur_units[:, 0]

    def loss(w, X, targets):
        assert len(w) > 0
        preds = predictions(w, X)
        return nll_func(preds, targets)

    return loss, predictions, parser
Beispiel #11
0
def build_fixed_convnet_fingerprint_fun(**kwargs):

    fp_fun, parser = build_convnet_fingerprint_fun(**kwargs)

    random_weights = npr.RandomState(0).randn(len(parser))

    def double_fingerprint_fun(empty_weights, smiles_tuple):
        smiles1, smiles2 = zip(*smiles_tuple)
        fp1 = fp_fun(random_weights, smiles1)
        fp2 = fp_fun(random_weights, smiles2)
        return np.concatenate([fp1, fp2], axis=1)

    empty_parser = WeightsParser()
    return double_fingerprint_fun, empty_parser
Beispiel #12
0
def build_standard_net(layer_sizes,
                       normalize,
                       L2_reg=0.0,
                       L1_reg=0.0,
                       activation_function=relu,
                       nll_func=mean_squared_error,
                       num_outputs=1):
    """Just a plain old neural net, nothing to do with molecules.
    layer sizes includes the input size."""
    # How to change the output size here?
    layer_sizes = layer_sizes + [num_outputs]
    # Add number of outputs here

    parser = WeightsParser()
    for i, shape in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        parser.add_weights(('weights', i), shape)
        parser.add_weights(('biases', i), (1, shape[1]))

    def predictions(W_vect, X):
        cur_units = X
        for layer in range(len(layer_sizes) - 1):
            cur_W = parser.get(W_vect, ('weights', layer))
            cur_B = parser.get(W_vect, ('biases', layer))
            cur_units = np.dot(cur_units, cur_W) + cur_B
            if layer < len(layer_sizes) - 2:
                if normalize:
                    cur_units = batch_normalize(cur_units)
                cur_units = activation_function(cur_units)
        # Note, cur_units will not be normalized for the categorical_nll case.
        #    will let categorical_nll handle it.
        return cur_units

    def loss(w, X, targets):
        assert len(w) > 0
        log_prior = -L2_reg * np.dot(w, w) / len(w) - L1_reg * np.mean(
            np.abs(w))
        preds = predictions(w, X)
        if nll_func == categorical_nll:
            return nll_func(preds, targets, num_outputs) - log_prior
        return nll_func(preds, targets) - log_prior

    return loss, predictions, parser
Beispiel #13
0
def build_double_morgan_deep_net(fp_length, fp_depth, net_params):
    empty_parser = WeightsParser()
    morgan_fp_func = build_double_morgan_fingerprint_fun(fp_length, fp_depth)
    return build_fingerprint_deep_net(net_params, morgan_fp_func, empty_parser,
                                      0)
Beispiel #14
0
def build_chemdec_deep_net(fp_length, fp_depth, net_params):
    empty_parser = WeightsParser()
    chemdec_func = build_chemdec_fun(fp_length, fp_depth)
    return build_fingerprint_deep_net(net_params, chemdec_func, empty_parser,
                                      0)
Beispiel #15
0
def build_maccs_deep_net(fp_length, fp_depth, net_params):
    empty_parser = WeightsParser()
    maccs_fp_func = build_maccs_fingerprint_fun(fp_length, fp_depth)
    return build_fingerprint_deep_net(net_params, maccs_fp_func, empty_parser,
                                      0)
def build_convnet_fingerprint_fun(num_hidden_features=[100, 100], fp_length=512,
                                  normalize=True, activation_function=relu,
                                  return_atom_activations=False):
    """Sets up functions to compute convnets over all molecules in a minibatch together."""
    #import pdb; pdb.set_trace()
    # Specify weight shapes.
    parser = WeightsParser()
    all_layer_sizes = [num_atom_features()] + num_hidden_features  # """ V:Concatinating 2 lists OUT: [62,20,20, 20, 20] """
    print("num_atom_features ",num_atom_features())
    for layer in range(len(all_layer_sizes)):
        parser.add_weights(('layer output weights', layer), (all_layer_sizes[layer], fp_length))
        parser.add_weights(('layer output bias', layer),    (1, fp_length))

    in_and_out_sizes = zip(all_layer_sizes[:-1], all_layer_sizes[1:]) #""" V :OUT: [(62,20), (20,20), (20,20), (20,20)]"""
    print("in_and_out_sizes ",in_and_out_sizes)
    for layer, (N_prev, N_cur) in enumerate(in_and_out_sizes):
        parser.add_weights(("layer", layer, "biases"), (1, N_cur))
        parser.add_weights(("layer", layer, "self filter"), (N_prev, N_cur))
        for degree in degrees:  ################## V: I Dont know what a degree is ##########################   degrees = [0, 1, 2, 3, 4, 5]
            parser.add_weights(weights_name(layer, degree), (N_prev + num_bond_features(), N_cur))

    def update_layer(weights, layer, atom_features, bond_features, array_rep, normalize=False):
        # import pdb; pdb.set_trace()  
        def get_weights_func(degree):
            return parser.get(weights, weights_name(layer, degree))
        layer_bias         = parser.get(weights, ("layer", layer, "biases"))
        layer_self_weights = parser.get(weights, ("layer", layer, "self filter"))
        self_activations = np.dot(atom_features, layer_self_weights)           
        neighbour_activations = matmult_neighbors(   
            array_rep, atom_features, bond_features, get_weights_func)             
        # import pdb; pdb.set_trace()
        total_activations = neighbour_activations + self_activations + layer_bias    ### DOUBT : if i check the atom features here for visualisation, it is 1370
        # import pdb; pdb.set_trace() 
        # print("Total activations", np.shape(total_activations))           
        if normalize:
            total_activations = batch_normalize(total_activations)
        return activation_function(total_activations)

    def output_layer_fun_and_atom_activations(weights, smiles):  # V: Came here from line # 108 def output_layer_fun(weights, smiles)
        """Computes layer-wise convolution, and returns a fixed-size output."""
        import pdb; pdb.set_trace()
        array_rep = array_rep_from_smiles(tuple(smiles))
        atom_features = array_rep['atom_features']  # V: (1370,62)
        bond_features = array_rep['bond_features'] # V: (1416,6)

        all_layer_fps = []
        atom_activations = []
        def write_to_fingerprint(atom_features, layer):
            # import pdb; pdb.set_trace()
            cur_out_weights = parser.get(weights, ('layer output weights', layer))
            cur_out_bias    = parser.get(weights, ('layer output bias', layer))
            # import pdb; pdb.set_trace()
            atom_outputs = softmax(cur_out_bias + np.dot(atom_features, cur_out_weights), axis=1)  #V: Smooth all the atom features and then find the softmax, i.e the FP
            atom_activations.append(atom_outputs)   # V: Not needed for neural fingerprint, needed for visualization in neural FP
            # Sum over all atoms within a moleclue:
            layer_output = sum_and_stack(atom_outputs, array_rep['atom_list'])  #V: array_rep['atom_list'] stores the indexes of atoms in each smile size: (100,)
            all_layer_fps.append(layer_output)

        num_layers = len(num_hidden_features) #V: (num_layers = 4) , num_hidden_features = [20, 20, 20, 20]
        for layer in xrange(num_layers):
            write_to_fingerprint(atom_features, layer)
            atom_features = update_layer(weights, layer, atom_features, bond_features, array_rep,
                                         normalize=normalize)
        write_to_fingerprint(atom_features, num_layers)
        return sum(all_layer_fps), atom_activations, array_rep

    def output_layer_fun(weights, smiles):  # V: Came here from line # 80 in build_vanilla_net.py
        #import pdb; pdb.set_trace()
        output, _, _ = output_layer_fun_and_atom_activations(weights, smiles)
        return output

    def compute_atom_activations(weights, smiles):
        _, atom_activations, array_rep = output_layer_fun_and_atom_activations(weights, smiles)
        return atom_activations, array_rep

    if return_atom_activations:
        #import pdb; pdb.set_trace()
        return output_layer_fun, parser, compute_atom_activations
    else:
        #import pdb; pdb.set_trace()
        return output_layer_fun, parser
Beispiel #17
0
def build_convnet_fingerprint_fun(num_hidden_features=[100, 100],
                                  fp_length=512,
                                  normalize=True,
                                  activation_function=relu,
                                  return_atom_activations=False):
    """Sets up functions to compute convnets over all molecules in a minibatch together."""

    # Specify weight shapes.
    parser = WeightsParser()
    all_layer_sizes = [num_atom_features()] + num_hidden_features
    for layer in range(len(all_layer_sizes)):
        parser.add_weights(('layer output weights', layer),
                           (all_layer_sizes[layer], fp_length))
        parser.add_weights(('layer output bias', layer), (1, fp_length))

    in_and_out_sizes = zip(all_layer_sizes[:-1], all_layer_sizes[1:])
    for layer, (N_prev, N_cur) in enumerate(in_and_out_sizes):
        parser.add_weights(("layer", layer, "biases"), (1, N_cur))
        parser.add_weights(("layer", layer, "self filter"), (N_prev, N_cur))
        for degree in degrees:
            parser.add_weights(weights_name(layer, degree),
                               (N_prev + num_bond_features(), N_cur))

    def update_layer(weights,
                     layer,
                     atom_features,
                     bond_features,
                     array_rep,
                     normalize=False):
        def get_weights_func(degree):
            return parser.get(weights, weights_name(layer, degree))

        layer_bias = parser.get(weights, ("layer", layer, "biases"))
        layer_self_weights = parser.get(weights,
                                        ("layer", layer, "self filter"))
        self_activations = np.dot(atom_features, layer_self_weights)
        neighbor_activations = matmult_neighbors(array_rep, atom_features,
                                                 bond_features,
                                                 get_weights_func)

        total_activations = neighbor_activations + self_activations + layer_bias
        if normalize:
            total_activations = batch_normalize(total_activations)
        return activation_function(total_activations)

    def output_layer_fun_and_atom_activations(weights, smiles):
        """Computes layer-wise convolution, and returns a fixed-size output."""

        array_rep = array_rep_from_smiles(tuple(smiles))

        atom_features = array_rep['atom_features']
        bond_features = array_rep['bond_features']

        all_layer_fps = []
        atom_activations = []

        def write_to_fingerprint(atom_features, layer):
            cur_out_weights = parser.get(weights,
                                         ('layer output weights', layer))
            cur_out_bias = parser.get(weights, ('layer output bias', layer))
            atom_outputs = softmax(cur_out_bias +
                                   np.dot(atom_features, cur_out_weights),
                                   axis=1)
            atom_activations.append(atom_outputs)
            # Sum over all atoms within a moleclue:
            layer_output = sum_and_stack(atom_outputs, array_rep['atom_list'])
            all_layer_fps.append(layer_output)

        num_layers = len(num_hidden_features)
        for layer in xrange(num_layers):
            write_to_fingerprint(atom_features, layer)
            atom_features = update_layer(weights,
                                         layer,
                                         atom_features,
                                         bond_features,
                                         array_rep,
                                         normalize=normalize)
        write_to_fingerprint(atom_features, num_layers)
        return sum(all_layer_fps), atom_activations, array_rep

    def output_layer_fun(weights, smiles):
        output, _, _ = output_layer_fun_and_atom_activations(weights, smiles)
        return output

    def compute_atom_activations(weights, smiles):
        _, atom_activations, array_rep = output_layer_fun_and_atom_activations(
            weights, smiles)
        return atom_activations, array_rep

    if return_atom_activations:
        return output_layer_fun, parser, compute_atom_activations
    else:
        return output_layer_fun, parser