Example #1
0
def main():
    cfg_grammar_file = '../../dropbox/context_free_grammars/prog_leftskew.grammar'

    grammar = parser.Grammar(cfg_grammar_file)
    ts = parser.parse(
        'v1=sin(v0);v2=v0*4;v3=v1/v2;v4=cos(v0);v5=v0*3;v6=sin(v1);v7=v3-v6;v8=v7+v5;v9=v8+v4;return:v9', grammar
    )
    t = ts[0]

    print('(ugly) tree:')
    print(t)
    print()

    print('for root:')
    print(
        'symbol is %s, is it non-terminal = %s, it\' value is %s (of type %s)' %
        (t.symbol, isinstance(t, parser.Nonterminal), t.symbol.symbol(), type(t.symbol.symbol()))
    )
    print(
        'rule is %s, its left side is %s (of type %s), its right side is %s, a tuple '
        'which each element can be either str (for terminal) or Nonterminal (for nonterminal)' % (
            t.rule,
            t.rule.lhs(),
            type(t.rule.lhs()),
            t.rule.rhs(),
        )
    )
Example #2
0
    def encode(self, chunk, use_random=False):
        """
        Args:
            chunk: a list of `n` strings, each being a SMILES.

        Returns:
            A numpy array of dtype np.float32, of shape (n, latent_dim)
            Note: Each row should be the *mean* of the latent space distrubtion rather than a sampled point from that distribution.
            (It can be anythin as long as it fits what self.decode expects)
        """

        cfg_tree_list = []
        for smiles in chunk:
            ts = parser.parse(smiles, self.grammar)
            assert isinstance(ts, list) and len(ts) == 1

            n = AnnotatedTree2MolTree(ts[0])
            cfg_tree_list.append(n)

        if type(chunk[0]) is str:
            cfg_tree_list = parse(chunk, self.grammar)
        else:
            cfg_tree_list = chunk

        onehot, _ = batch_make_att_masks(cfg_tree_list,
                                         self.tree_decoder,
                                         self.onehot_walker,
                                         dtype=np.float32)

        x_inputs = np.transpose(onehot, [0, 2, 1])

        x_inputs = paddle.to_tensor(x_inputs)
        z_mean, _ = self.ae.encoder(x_inputs)

        return z_mean.numpy()
Example #3
0
  def save_map(self, n=None, notify=True):
    
    if n:
      self._NAME = n
    else:
      n = self._NAME

    # create world folder if needed
    p = os.path.join("saves", n)
    if not os.path.exists(p):
      os.mkdir(p)

    # construct structure to save
    t = self.tiles[:]
    data_struct = {
      "map": t,
      "width": self.w,
      "height": self.h,
      "sun_pos": self.sun_pos,
      "diagnostics": self._DIAGNOSTIC,
      "inventory": self.inventory.slots
    }

    # parse data
    ps = cfg_parser.parse(data_struct)

    # save it
    # with open(os.path.join(p, "main.pkl"), "w") as f:
    with open(os.path.join(p, "main.pkl"), "w") as f:
      f.write(ps)


    if notify:
      self.notify.msg("Saved", "Map '"+n+"' has been saved.")
      print "saved"
Example #4
0
def main():
    cfg_grammar_file = '../../dropbox/context_free_grammars/prog_leftskew.grammar'

    grammar = parser.Grammar(cfg_grammar_file)
    ts = parser.parse(
        'v1=sin(v0);v2=v0*4;v3=v1/v2;v4=cos(v0);v5=v0*3;v6=sin(v1);v7=v3-v6;v8=v7+v5;v9=v8+v4;return:v9',
        grammar)
    t = ts[0]

    print('(ugly) tree:')
    print(t)
    print()

    print('for root:')
    print(
        'symbol is %s, is it non-terminal = %s, it\' value is %s (of type %s)'
        % (t.symbol, isinstance(t, parser.Nonterminal), t.symbol.symbol(),
           type(t.symbol.symbol())))
    print(
        'rule is %s, its left side is %s (of type %s), its right side is %s, a tuple '
        'which each element can be either str (for terminal) or Nonterminal (for nonterminal)'
        % (
            t.rule,
            t.rule.lhs(),
            type(t.rule.lhs()),
            t.rule.rhs(),
        ))
Example #5
0
    def save_map(self, n=None, notify=True):

        if n:
            self._NAME = n
        else:
            n = self._NAME

        # create world folder if needed
        p = os.path.join("saves", n)
        if not os.path.exists(p):
            os.mkdir(p)

        # construct structure to save
        t = self.tiles[:]
        data_struct = {
            "map": t,
            "width": self.w,
            "height": self.h,
            "sun_pos": self.sun_pos,
            "diagnostics": self._DIAGNOSTIC,
            "inventory": self.inventory.slots
        }

        # parse data
        ps = cfg_parser.parse(data_struct)

        # save it
        # with open(os.path.join(p, "main.pkl"), "w") as f:
        with open(os.path.join(p, "main.pkl"), "w") as f:
            f.write(ps)

        if notify:
            self.notify.msg("Saved", "Map '" + n + "' has been saved.")
            print "saved"
def parse_single(smiles, grammar):
    """
    tbd
    """
    ts = parser.parse(smiles, grammar)
    assert isinstance(ts, list) and len(ts) == 1
    n = AnnotatedTree2MolTree(ts[0])
    return n
Example #7
0
def parse_smiles_with_cfg(smiles_file, grammar_file):
    grammar = parser.Grammar(grammar_file)

    cfg_tree_list = []
    with open(smiles_file, 'r') as f:
        for row in tqdm(f):
            smiles = row.strip()
            ts = parser.parse(smiles, grammar)
            assert isinstance(ts, list) and len(ts) == 1
            n = AnnotatedTree2MolTree(ts[0])
            cfg_tree_list.append(n)

    return cfg_tree_list
Example #8
0
def parse_smiles_with_cfg(smiles_file, grammar_file):
    grammar = parser.Grammar(cmd_args.grammar_file)

    cfg_tree_list = []
    with open(smiles_file, 'r') as f:
        for row in tqdm(f):
            smiles = row.strip()
            ts = parser.parse(smiles, grammar)
            assert isinstance(ts, list) and len(ts) == 1
            n = AnnotatedTree2MolTree(ts[0])
            cfg_tree_list.append(n)

    return cfg_tree_list
Example #9
0
def process_chunk(program_list):
    grammar = parser.Grammar(cmd_args.grammar_file)

    cfg_tree_list = []
    for program in program_list:
        ts = parser.parse(program, grammar)
        assert isinstance(ts, list) and len(ts) == 1

        n = AnnotatedTree2ProgTree(ts[0])
        cfg_tree_list.append(n)

    walker = ProgramOnehotBuilder()
    tree_decoder = ProgTreeDecoder()
    onehot, masks = batch_make_att_masks(cfg_tree_list, tree_decoder, walker, dtype=np.byte)

    return (onehot, masks)
Example #10
0
def process_chunk(smiles_list):
    grammar = parser.Grammar(cmd_args.grammar_file)

    cfg_tree_list = []
    for smiles in smiles_list:
        ts = parser.parse(smiles, grammar)
        assert isinstance(ts, list) and len(ts) == 1

        n = AnnotatedTree2MolTree(ts[0])
        cfg_tree_list.append(n)

    walker = OnehotBuilder()
    tree_decoder = create_tree_decoder()
    onehot, masks = batch_make_att_masks(cfg_tree_list, tree_decoder, walker, dtype=np.byte)

    return (onehot, masks)
Example #11
0
def parse_smiles_with_cfg(smiles_file, grammar_file):
    grammar = parser.Grammar(cmd_args.grammar_file)

    smiles_list = []
    cfg_tree_list = []
    annotated_trees = []
    with open(smiles_file, 'r') as f:
        for row in tqdm(f):
            smiles = row.strip()
            smiles_list.append(smiles)
            ts = parser.parse(smiles, grammar)
            assert isinstance(ts, list) and len(ts) == 1
            annotated_trees.append(ts[0])
            n = AnnotatedTree2MolTree(ts[0])
            cfg_tree_list.append(n)
            st = get_smiles_from_tree(n)

            assert st == smiles

    return (smiles_list, cfg_tree_list, annotated_trees)
Example #12
0
def parse_smiles_with_cfg(smiles_file, grammar_file):
    grammar = parser.Grammar(cmd_args.grammar_file)

    smiles_list = []
    cfg_tree_list = []
    annotated_trees = []
    with open(smiles_file, 'r') as f:
        for row in tqdm(f):
            smiles = row.strip()
            smiles_list.append(smiles)
            ts = parser.parse(smiles, grammar)
            assert isinstance(ts, list) and len(ts) == 1
            annotated_trees.append(ts[0])
            n = AnnotatedTree2MolTree(ts[0])
            cfg_tree_list.append(n)
            st = get_smiles_from_tree(n)

            assert st == smiles

    return (smiles_list, cfg_tree_list, annotated_trees)
Example #13
0
        for c in node.children:
            dfs_indices(c, result)


def AnnotatedTree2RuleIndices(annotated_root):
    result = []
    dfs_indices(annotated_root, result)
    return np.array(result)


def AnnotatedTree2Onehot(annotated_root, max_len):
    cur_indices = AnnotatedTree2RuleIndices(annotated_root)
    assert len(cur_indices) <= max_len

    x_cpu = np.zeros((DECISION_DIM, max_len), dtype=np.float32)
    x_cpu[cur_indices, np.arange(len(cur_indices))] = 1.0
    x_cpu[-1, np.arange(len(cur_indices), max_len)] = 1.0  # padding

    return x_cpu


if __name__ == '__main__':

    smiles = 'OSC'
    grammar = cfg_parser.Grammar(cmd_args.grammar_file)

    ts = cfg_parser.parse(smiles, grammar)
    assert isinstance(ts, list) and len(ts) == 1

    print(AnnotatedTree2RuleIndices(ts[0]))
Example #14
0
def parse(chunk, grammar):
    size = 100
    result_list = Parallel(n_jobs=-1)(delayed(parse_many)(chunk[i: i + size], grammar) for i in range(0, len(chunk), size))
    return [_1 for _0 in result_list for _1 in _0]

import cPickle as cp

from tqdm import tqdm

if __name__ == '__main__':
    smiles_file = cmd_args.smiles_file 
    fname = '.'.join(smiles_file.split('.')[0:-1]) + '.cfg_dump'
    fout = open(fname, 'wb')
    grammar = parser.Grammar(cmd_args.grammar_file)

    with open(smiles_file, 'r') as f:
        smiles = f.readlines()
    for i in range(len(smiles)):
        smiles[ i ] = smiles[ i ].strip()

    # cfg_tree_list = parse(smiles, grammar)
    # cp.dump(cfg_tree_list, fout, cp.HIGHEST_PROTOCOL)
    
    for i in tqdm(range(len(smiles))):
        ts = parser.parse(smiles[i], grammar)
        assert isinstance(ts, list) and len(ts) == 1
        n = AnnotatedTree2MolTree(ts[0])
        cp.dump(n, fout, cp.HIGHEST_PROTOCOL)

    fout.close()
Example #15
0
        flatten = h3.view(x_cpu.shape[0], -1)
        h = self.w1(flatten)
        h = F.relu(h)

        z_mean = self.mean_w(h)
        z_log_var = self.log_var_w(h)
        
        return (z_mean, z_log_var)

if __name__ == '__main__':

    smiles_list = ['N\SCPP#IOS', 'CP\P', 'PINI']

    cfg_trees = []
    cfg_onehots = []
    grammar = parser.Grammar(cmd_args.grammar_file)
    for smiles in smiles_list:
        ts = parser.parse(smiles, grammar)
        assert isinstance(ts, list) and len(ts) == 1
        n = AnnotatedTree2MolTree(ts[0])
        cfg_trees.append(n)
        cfg_onehots.append(AnnotatedTree2Onehot(ts[0], 50))

    cfg_onehots = np.stack(cfg_onehots, axis=0)

    encoder = CNNEncoder(max_len=50, latent_dim=64)
    if cmd_args.mode == 'gpu':
        encoder.cuda()
    z = encoder(cfg_onehots)
    print(z[0].size())
Example #16
0
        result.append(idx)
        for c in node.children:
            dfs_indices(c, result)

def AnnotatedTree2RuleIndices(annotated_root):
    result = []
    dfs_indices(annotated_root, result)
    return np.array(result)

def AnnotatedTree2Onehot(annotated_root, max_len):
    cur_indices = AnnotatedTree2RuleIndices(annotated_root)
    assert len(cur_indices) <= max_len

    x_cpu = np.zeros(( DECISION_DIM, max_len ), dtype=np.float32)
    x_cpu[cur_indices, np.arange(len(cur_indices))] = 1.0
    x_cpu[-1, np.arange(len(cur_indices), max_len)] = 1.0 # padding

    return x_cpu

if __name__ == '__main__':

    smiles = 'OSC'
    grammar = cfg_parser.Grammar(cmd_args.grammar_file)


    ts = cfg_parser.parse(smiles, grammar)
    assert isinstance(ts, list) and len(ts) == 1

    print(AnnotatedTree2RuleIndices(ts[0]))
Example #17
0
def parse_single(program, grammar):
    ts = parser.parse(program, grammar)
    assert isinstance(ts, list) and len(ts) == 1
    n = AnnotatedTree2ProgTree(ts[0])
    return n
Example #18
0
    for _0 in result_list:
        for  _1 in _0:
            return_value.append(_1)

    return return_value


if __name__ == '__main__':
    smiles_file = cmd_args.smiles_file 
    fname = '.'.join(smiles_file.split('.')[0:-1]) + '.cfg_dump'
    fout = open(fname, 'wb')
    grammar = parser.Grammar(cmd_args.grammar_file)

    # load smiles strings as a list
    with open(smiles_file, 'r') as f:
        smiles = f.readlines()

    for i in range(len(smiles)):
        smiles[i] = smiles[i].strip()

    # cfg_tree_list = parse(smiles, grammar)
    # cp.dump(cfg_tree_list, fout, cp.HIGHEST_PROTOCOL)
    
    for i in tqdm(range(len(smiles))):
        ts = parser.parse(smiles[i], grammar)
        assert isinstance(ts, list) and len(ts) == 1
        n = AnnotatedTree2MolTree(ts[0])
        pickle.dump(n, fout, pickle.HIGHEST_PROTOCOL)

    fout.close()
Example #19
0
        h = self.w1(flatten)
        h = F.relu(h)

        z_mean = self.mean_w(h)
        z_log_var = self.log_var_w(h)

        return (z_mean, z_log_var)


if __name__ == '__main__':

    smiles_list = ['N\SCPP#IOS', 'CP\P', 'PINI']

    cfg_trees = []
    cfg_onehots = []
    grammar = parser.Grammar(cmd_args.grammar_file)
    for smiles in smiles_list:
        ts = parser.parse(smiles, grammar)
        assert isinstance(ts, list) and len(ts) == 1
        n = AnnotatedTree2MolTree(ts[0])
        cfg_trees.append(n)
        cfg_onehots.append(AnnotatedTree2Onehot(ts[0], 50))

    cfg_onehots = np.stack(cfg_onehots, axis=0)

    encoder = CNNEncoder(max_len=50, latent_dim=64)
    if cmd_args.mode == 'gpu':
        encoder.cuda()
    z = encoder(cfg_onehots)
    print(z[0].size())
Example #20
0
#!/usr/bin/env python3

import cfg_parser as parser


info_folder = '../../dropbox/context_free_grammars'
grammar = parser.Grammar(info_folder + '/mol_zinc.grammar')
ts = parser.parse('ClI=I=S(CBI)(-CN(C-N(N-C-F))I(S-I)C-C=I)', grammar)
t = ts[0]

print('(ugly) tree:')
print(t)
print()


print('for root:')
print('symbol is %s, is it non-terminal = %s, it\' value is %s (of type %s)' % (
    t.symbol,
    isinstance(t, parser.Nonterminal),
    t.symbol.symbol(),
    type(t.symbol.symbol())
))
print('rule is %s, its left side is %s (of type %s), its right side is %s, a tuple '
'which each element can be either str (for terminal) or Nonterminal (for nonterminal)' % (
   t.rule,
   t.rule.lhs(),
   type(t.rule.lhs()),
   t.rule.rhs(),
))
Example #21
0
def parse_single(smiles, grammar):
    ts = parser.parse(smiles, grammar)
    assert isinstance(ts, list) and len(ts) == 1
    n = AnnotatedTree2MolTree(ts[0])
    return n