Ejemplo n.º 1
0
def run(layer_name):
    # grab dataset
    # esol = esp.data.esol(first=20)
    ds = esp.data.zinc(first=1000).shuffle()

    # do some typing
    typing = esp.graphs.legacy_force_field.LegacyForceField('gaff-1.81')
    ds.apply(typing, in_place=True)  # this modify the original data

    # split
    # NOTE:
    # I don't like torch-generic splitting function as it requires
    # specifically the volume of each partition and it is inconsistent
    # with the specification of __getitem__ method
    ds_tr, ds_te = ds.split([4, 1])

    ds_tr.save('ds_tr.th')
    ds_te.save('ds_te.th')

    # get a loader object that views this dataset in some way
    # using this specific flag the dataset turns into an iterator
    # that outputs loss function, per John's suggestion
    ds_tr = ds_tr.view('graph', batch_size=20)
    ds_te = ds_te.view('graph', batch_size=len(ds_te))

    # define a layer
    layer = esp.nn.layers.dgl_legacy.gn(layer_name)

    # define a representation
    representation = esp.nn.Sequential(
        layer, [32, 'leaky_relu', 32, 'leaky_relu', 32, 'leaky_relu'])

    # define a readout
    readout = esp.nn.readout.node_typing.NodeTyping(
        in_features=32, n_classes=100)  # not too many elements here I think?

    net = torch.nn.Sequential(representation, readout)

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_te=[esp.metrics.TypingAccuracy()],
        n_epochs=500,
    )

    results = exp.run()

    print(esp.app.report.markdown(results))

    import pickle
    with open(layer_name + "_ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(layer_name + "_ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)
Ejemplo n.º 2
0
def run():
    # grab dataset
    ds = esp.data.zinc(first=1000)

    # do some typing
    param = esp.graphs.legacy_force_field.LegacyForceField(
        'smirnoff99Frosst').parametrize
    ds.apply(param, in_place=True)  # this modify the original data

    # split
    # NOTE:
    # I don't like torch-generic splitting function as it requires
    # specifically the volume of each partition and it is inconsistent
    # with the specification of __getitem__ method
    ds_tr, ds_te = ds.split([4, 1])

    # get a loader object that views this dataset in some way
    # using this specific flag the dataset turns into an iterator
    # that outputs loss function, per John's suggestion
    loader = ds_tr.view('graph', batch_size=2)

    # define a layer
    layer = esp.nn.layers.dgl_legacy.gn('GraphConv')

    # define a representation
    representation = esp.nn.Sequential(layer,
                                       [32, 'tanh', 32, 'tanh', 32, 'tanh'])

    # define a readout
    readout = esp.nn.readout.janossy.JanossyPooling(config=[32, 'tanh'],
                                                    in_features=32)

    net = torch.nn.Sequential(representation, readout)

    exp = esp.TrainAndTest(
        ds_tr=loader,
        ds_te=loader,
        net=net,
        metrics_tr=[
            esp.metrics.GraphMetric(between=['k_ref', 'k'],
                                    level='n2',
                                    base_metric=torch.nn.MSELoss())
        ],
        metrics_te=[
            esp.metrics.GraphMetric(between=['k_ref', 'k'],
                                    level='n3',
                                    base_metric=esp.metrics.rmse)
        ],
        n_epochs=10000,
    )

    results = exp.run()

    print(esp.app.report.markdown(results))
Ejemplo n.º 3
0
def run(args):
    # define data
    data = getattr(esp.data, args.data)(first=args.first)

    # get force field
    forcefield = esp.graphs.legacy_force_field.LegacyForceField(
        args.forcefield)

    # param / typing
    operation = forcefield.parametrize

    # apply to dataset
    data = data.apply(operation, in_place=True)

    # split
    partition = [int(x) for x in args.partition.split(":")]
    ds_tr, ds_te = data.split(partition)

    # batch
    ds_tr = ds_tr.view("graph", batch_size=args.batch_size)
    ds_te = ds_te.view("graph", batch_size=args.batch_size)

    # layer
    layer = esp.nn.layers.dgl_legacy.gn(args.layer)

    # representation
    representation = esp.nn.Sequential(layer, config=args.config)

    # get the last bit of units
    units = [x for x in args.config if isinstance(x, int)][-1]

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=units,
        config=args.janossy_config,
        out_features={
            2: ["k", "eq"],
            3: ["k", "eq"],
        },
    )

    net = torch.nn.Sequential(representation, readout)

    metrics_tr = [
        esp.metrics.GraphMetric(
            base_metric=torch.nn.L1Loss(),
            between=[param, param + "_ref"],
            level=term,
        ) for param in ["k", "eq"] for term in ["n2", "n3"]
    ]

    metrics_te = [
        esp.metrics.GraphMetric(
            base_metric=base_metric,
            between=[param, param + "_ref"],
            level=term,
        ) for param in ["k", "eq"] for term in ["n2", "n3"]
        for base_metric in [esp.metrics.rmse, esp.metrics.r2]
    ]

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_tr=metrics_tr,
        metrics_te=metrics_te,
        n_epochs=args.n_epochs,
    )

    results = exp.run()

    print(esp.app.report.markdown(results))

    import os

    os.mkdir(args.out)

    with open(args.out + "/architecture.txt", "w") as f_handle:
        f_handle.write(str(exp))

    with open(args.out + "/result_table.md", "w") as f_handle:
        f_handle.write(esp.app.report.markdown(results))

    curves = esp.app.report.curve(results)

    for spec, curve in curves.items():
        np.save(args.out + "/" + "_".join(spec) + ".npy", curve)

    import pickle

    with open(args.out + "/ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(args.out + "/ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)
Ejemplo n.º 4
0
def run(args):
    # define data
    data = getattr(esp.data, args.data)(first=args.first)

    # get force field
    forcefield = esp.graphs.legacy_force_field.LegacyForceField(
        args.forcefield)

    # param / typing
    operation = getattr(forcefield, args.operation)

    # apply to dataset
    data = data.apply(operation, in_place=True)

    # split
    partition = [int(x) for x in args.partition.split(":")]
    ds_tr, ds_te = data.split(partition)

    # batch
    ds_tr = ds_tr.view("graph", batch_size=args.batch_size)
    ds_te = ds_te.view("graph", batch_size=args.batch_size)

    # layer
    layer = esp.nn.layers.dgl_legacy.gn(args.layer)

    # representation
    representation = esp.nn.Sequential(layer, config=args.config)

    # get the last bit of units
    units = [x for x in args.config if isinstance(x, int)][-1]

    # readout
    if args.readout == "node_typing":
        readout = esp.nn.readout.node_typing.NodeTyping(
            in_features=units, n_classes=args.n_classes)

    if args.readout == "janossy":
        readout = esp.nn.readout.janossy.JanossyPooling(
            in_features=units, config=args.janossy_config)

    net = torch.nn.Sequential(representation, readout)

    training_metrics = [
        getattr(esp.metrics, metric)() for metric in args.training_metrics
    ]

    test_metrics = [
        getattr(esp.metrics, metric)() for metric in args.test_metrics
    ]

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_tr=[
            getattr(esp.metrics, metric)() for metric in args.training_metrics
        ],
        metrics_te=[
            getattr(esp.metrics, metric)() for metric in args.test_metrics
        ],
        n_epochs=args.n_epochs,
    )

    results = exp.run()

    print(esp.app.report.markdown(results))
Ejemplo n.º 5
0
def run(args):
    '''
    ds = esp.data.dataset.GraphDataset().load(
        'ds.th',
        )

    def subtract_offset(g):
        elements = [atom.atomic_number for atom in g.mol.atoms]
        offset = esp.data.utils.sum_offsets(elements)
        g.nodes['g'].data['u_ref'] -= offset
        return g

    @torch.no_grad()
    def exclude_high_energy(g):
        u_min = g.nodes['g'].data['u_ref'].min()
        u_threshold = u_min + 0.01 # hatree
        mask = torch.lt(g.nodes['g'].data['u_ref'], u_threshold).squeeze()
        
        print('%s selected' % (mask.sum().numpy().item() / mask.shape[0]))

        g.nodes['g'].data['u_ref'] = g.nodes['g'].data['u_ref'][:, mask]

        g.nodes['n1'].data['xyz'].requires_grad = False
        g.nodes['n1'].data['xyz'] = g.nodes['n1'].data['xyz'][:, mask, :]
        g.nodes['n1'].data['xyz'].requires_grad = True

        g.nodes['n1'].data['u_ref_prime'] = g.nodes['n1'].data['u_ref_prime'][:, mask, :]


        return g

    @torch.no_grad()
    def subsample(g, n_samples=100):
        n_total_samples = g.nodes['g'].data['u_ref'].shape[1]
        mask = np.random.choice(list(range(n_total_samples)), n_samples, replace=False).tolist()

        g.nodes['g'].data['u_ref'] = g.nodes['g'].data['u_ref'][:, mask]

        g.nodes['n1'].data['xyz'].requires_grad = False
        g.nodes['n1'].data['xyz'] = g.nodes['n1'].data['xyz'][:, mask, :]
        g.nodes['n1'].data['xyz'].requires_grad = True

        g.nodes['n1'].data['u_ref_prime'] = g.nodes['n1'].data['u_ref_prime'][:, mask, :]

        return g
    ds.apply(
        subtract_offset,
        in_place=True,
    )

    ds.apply(
        exclude_high_energy,
        in_place=True,
    )

    ds = esp.data.dataset.GraphDataset(
        [g for g in ds if g.nodes['g'].data['u_ref'].shape[1] > 100]
    )

    print(ds.graphs)

    ds.apply(
        subsample,
        in_place=True
    )

    print(len(ds))

    ds.apply(
        esp.data.md.subtract_nonbonded_force,
        in_place=True,
    )

    ds.save('ds_lean.th')
    '''

    ds = esp.data.dataset.GraphDataset().load('ds_lean.th')[:100]

    ds_tr, ds_te = ds.split([4, 1])

    ds_tr = ds_tr.view(batch_size=80, shuffle=True)
    ds_te = ds_te.view(batch_size=20, shuffle=True)

    # layer
    layer = esp.nn.layers.dgl_legacy.gn(args.layer)

    # representation
    representation = esp.nn.Sequential(layer, config=args.config)

    # get the last bit of units
    units = [
        int(x) for x in args.config
        if isinstance(x, int) or (isinstance(x, str) and x.isdigit())
    ][-1]

    janossy_config = []
    for x in args.janossy_config:
        if isinstance(x, int):
            janossy_config.append(int(x))

        elif x.isdigit():
            janossy_config.append(int(x))

        else:
            janossy_config.append(x)

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=units,
        config=janossy_config,
        out_features={
            1: {
                'sigma': 1,
                'epsilon': 1
            },
            2: {
                'k': 1,
                'eq': 1
            },
            3: {
                'k': 1,
                'eq': 1
            },
            4: {
                'k': 6
            },
        },
    )

    global_readout = esp.nn.readout.graph_level_readout.GraphLevelReadout(
        units,
        [units, args.graph_act],
        [units, args.graph_act, 1],
        'u0',
    )

    net = torch.nn.Sequential(
        representation,
        readout,
        global_readout,
        esp.mm.geometry.GeometryInGraph(),
        esp.mm.energy.EnergyInGraph(),
        # esp.mm.energy.EnergyInGraph(suffix='_ref'),
    )

    metrics_tr = [
        esp.metrics.GraphMetric(
            base_metric=torch.nn.MSELoss(),
            between=['u', "u_ref"],
            level="g",
        ),
        esp.metrics.GraphHalfDerivativeMetric(
            base_metric=torch.nn.MSELoss(),
            weight=args.weight,
        ),
    ]

    metrics_te = [
        esp.metrics.GraphMetric(
            base_metric=esp.metrics.r2,
            between=['u', 'u_ref'],
            level="g",
        ),
        esp.metrics.GraphMetric(
            base_metric=esp.metrics.rmse,
            between=['u', 'u_ref'],
            level="g",
        ),
        esp.metrics.GraphHalfDerivativeMetric(
            base_metric=esp.metrics.r2,
            weight=1.0,
        ),
        esp.metrics.GraphHalfDerivativeMetric(
            base_metric=esp.metrics.rmse,
            weight=1.0,
        ),
    ]

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_tr=metrics_tr,
        metrics_te=metrics_te,
        n_epochs=args.n_epochs,
        record_interval=1000,
        normalize=esp.data.normalize.PositiveNotNormalize,
        optimizer=lambda net: torch.optim.Adam(net.parameters(), args.lr),
        device=torch.device('cuda:0'),
    )

    results = exp.run()

    print(esp.app.report.markdown(results))

    import os
    os.mkdir(args.out)

    torch.save(net.state_dict(), args.out + "/net.th")

    with open(args.out + "/architecture.txt", "w") as f_handle:
        f_handle.write(str(exp))

    with open(args.out + "/result_table.md", "w") as f_handle:
        f_handle.write(esp.app.report.markdown(results))

    curves = esp.app.report.curve(results)

    for spec, curve in curves.items():
        np.save(args.out + "/" + "_".join(spec) + ".npy", curve)

    import pickle
    with open(args.out + "/ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(args.out + "/ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)

    print(esp.app.report.markdown(results))

    import pickle
    with open(args.out + "/ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(args.out + "/ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)
Ejemplo n.º 6
0
def run(args):
    # define data
    data = getattr(esp.data, args.data)(first=args.first)

    # get force field
    forcefield = esp.graphs.legacy_force_field.LegacyForceField(
        args.forcefield)

    # param / typing
    operation = forcefield.multi_typing

    # apply to dataset
    data = data.apply(operation, in_place=True)

    # split
    partition = [int(x) for x in args.partition.split(":")]
    ds_tr, ds_te = data.split(partition)

    # batch
    ds_tr = ds_tr.view("graph", batch_size=args.batch_size)
    ds_te = ds_te.view("graph", batch_size=args.batch_size)

    # layer
    layer = esp.nn.layers.dgl_legacy.gn(args.layer)

    # representation
    representation = esp.nn.Sequential(layer, config=args.config)

    # get the last bit of units
    units = [x for x in args.config if isinstance(x, int)][-1]

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=units,
        config=args.janossy_config,
        out_features={
            1: {
                "nn_typing": 100
            },
            2: {
                "nn_typing": 100
            },
            3: {
                "nn_typing": 100
            },
        },
    )

    net = torch.nn.Sequential(representation, readout)

    metrics_tr = [
        esp.metrics.GraphMetric(
            base_metric=torch.nn.CrossEntropyLoss(),
            between=["nn_typing", "legacy_typing"],
            level=term,
        ) for term in ["n1", "n2", "n3"]
    ]

    metrics_te = [
        esp.metrics.GraphMetric(
            base_metric=esp.metrics.accuracy,
            between=["nn_typing", "legacy_typing"],
            level=term,
        ) for term in ["n1", "n2", "n3"]
    ]

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_tr=metrics_tr,
        metrics_te=metrics_te,
        n_epochs=args.n_epochs,
    )

    results = exp.run()

    print(esp.app.report.markdown(results))
Ejemplo n.º 7
0
def run(args):
    # define data
    data = getattr(esp.data, args.data)(first=args.first)

    # get force field
    forcefield = esp.graphs.legacy_force_field.LegacyForceField(
        args.forcefield)

    # param / typing
    operation = forcefield.parametrize

    # apply to dataset
    data = data.apply(operation, in_place=True)

    # apply simulation
    # make simulation
    from espaloma.data.md import MoleculeVacuumSimulation
    simulation = MoleculeVacuumSimulation(
        n_samples=100,
        n_steps_per_sample=10,
    )

    data = data.apply(simulation.run, in_place=True)

    # split
    partition = [int(x) for x in args.partition.split(":")]
    ds_tr, ds_te = data.split(partition)

    # batch
    ds_tr = ds_tr.view("graph", batch_size=args.batch_size)
    ds_te = ds_te.view("graph", batch_size=args.batch_size)

    # layer
    layer = esp.nn.layers.dgl_legacy.gn(args.layer)

    # representation
    representation = esp.nn.Sequential(layer, config=args.config)

    # get the last bit of units
    units = [int(x) for x in args.config if x.isdigit()][-1]

    print(args.janossy_config)

    janossy_config = []
    for x in args.janossy_config:
        if isinstance(x, int):
            janossy_config.append(int(x))

        elif x.isdigit():
            janossy_config.append(int(x))

        else:
            janossy_config.append(x)

    print(janossy_config)

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=units,
        config=janossy_config,
    )

    net = torch.nn.Sequential(
        representation,
        readout,
        esp.mm.geometry.GeometryInGraph(),
        esp.mm.energy.EnergyInGraph(),
        esp.mm.energy.EnergyInGraph(suffix='_ref'),
    )
    '''
    metrics_tr = [
        esp.metrics.GraphMetric(
            base_metric=torch.nn.L1Loss(),
            between=['u', 'u_ref'],
            level='g'
        )

    ]
    '''

    metrics_tr = [
        esp.metrics.GraphMetric(
            base_metric=torch.nn.MSELoss(),
            between=['u', "u_ref"],
            level="g",
        ),
        esp.metrics.GraphDerivativeMetric(
            base_metric=torch.nn.MSELoss(),
            between=["u", "u_ref"],
            level="g",
            weight=10.0,
        ),
    ]

    metrics_te = [
        esp.metrics.GraphMetric(
            base_metric=esp.metrics.r2,
            between=['u', 'u_ref'],
            level="g",
        ),
        esp.metrics.GraphMetric(
            base_metric=esp.metrics.rmse,
            between=['u', 'u_ref'],
            level="g",
        ),
    ]
    '''
    metrics_te = [
        esp.metrics.GraphMetric(
            base_metric=base_metric,
            between=[param, param + '_ref'],
            level=term
        ) for param in ['u'] for term in ['g']
        for base_metric in [
            esp.metrics.rmse,
            esp.metrics.r2
        ]
    ]
    '''

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_tr=metrics_tr,
        metrics_te=metrics_te,
        n_epochs=args.n_epochs,
        normalize=esp.data.normalize.NotNormalize,
        optimizer=lambda net: torch.optim.Adam(net.parameters(), 1e-3),
        device=torch.device('cuda:0'),
    )

    results = exp.run()

    print(esp.app.report.markdown(results))

    import os
    os.mkdir(args.out)

    with open(args.out + "/architecture.txt", "w") as f_handle:
        f_handle.write(str(exp))

    with open(args.out + "/result_table.md", "w") as f_handle:
        f_handle.write(esp.app.report.markdown(results))

    curves = esp.app.report.curve(results)

    for spec, curve in curves.items():
        np.save(args.out + "/" + "_".join(spec) + ".npy", curve)

    import pickle
    with open(args.out + "/ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(args.out + "/ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)

    print(esp.app.report.markdown(results))

    import pickle
    with open(args.out + "/ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(args.out + "/ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)
Ejemplo n.º 8
0
def run(args):
    # define data
    data = getattr(esp.data, args.data)(first=1)

    # get force field
    forcefield = esp.graphs.legacy_force_field.LegacyForceField(
        args.forcefield)

    # param / typing
    operation = forcefield.parametrize

    # apply to dataset
    data = data.apply(operation, in_place=True)

    # apply simulation
    # make simulation
    from espaloma.data.md import MoleculeVacuumSimulation
    simulation = MoleculeVacuumSimulation(n_samples=1000,
                                          n_steps_per_sample=10)

    data = data.apply(simulation.run, in_place=True)

    # only one bit of data
    ds = data.view("graph", batch_size=1)
    ds_te = ds_tr = ds

    for g in ds:
        pass

    # representation
    representation = esp.nn.baselines.FreeParameterBaseline(g_ref=g)

    net = torch.nn.Sequential(
        representation,
        esp.mm.geometry.GeometryInGraph(),
        esp.mm.energy.EnergyInGraph(),
        esp.mm.energy.EnergyInGraph(suffix='_ref'),
    )

    optimizer = torch.optim.LBFGS(net.parameters(),
                                  0.01,
                                  line_search_fn='strong_wolfe')

    metrics_tr = [
        esp.metrics.GraphMetric(base_metric=torch.nn.MSELoss(),
                                between=['u', 'u_ref'],
                                level='g')
    ]

    metrics_te = [
        esp.metrics.GraphMetric(base_metric=base_metric,
                                between=[param, param + '_ref'],
                                level=term) for param in ['u']
        for term in ['g']
        for base_metric in [esp.metrics.rmse, esp.metrics.r2]
    ]

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_tr=metrics_tr,
        metrics_te=metrics_te,
        n_epochs=args.n_epochs,
        normalize=esp.data.normalize.PositiveNotNormalize,
        optimizer=optimizer,
    )

    results = exp.run()

    print(esp.app.report.markdown(results))

    import os
    os.mkdir(args.out)

    with open(args.out + "/architecture.txt", "w") as f_handle:
        f_handle.write(str(exp))

    with open(args.out + "/result_table.md", "w") as f_handle:
        f_handle.write(esp.app.report.markdown(results))

    curves = esp.app.report.curve(results)

    for spec, curve in curves.items():
        np.save(args.out + "/" + "_".join(spec) + ".npy", curve)

    import pickle
    with open(args.out + "/ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(args.out + "/ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)

    print(esp.app.report.markdown(results))

    import pickle
    with open(args.out + "/ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(args.out + "/ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)