Esempio n. 1
0
def test_forward():
    # load paddle model
    paddle_model = mv3_small_paddle()
    paddle_model.eval()
    paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
    paddle_model.set_dict(paddle_state_dict)

    # load torch model
    torch_model = mv3_small_torch()
    torch_model.eval()
    torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
    torch_model.load_state_dict(torch_state_dict)

    # prepare logger & load data
    reprod_logger = ReprodLogger()
    paddle_dataset, paddle_dataloader = build_paddle_data_pipeline()
    torch_dataset, torch_dataloader = build_torch_data_pipeline()
    for idx, (paddle_batch, torch_batch) in enumerate(
            zip(paddle_dataloader, torch_dataloader)):
        if idx > 0:
            break
        evaluate(paddle_batch[0], paddle_batch[1], paddle_model,
                 accuracy_paddle, 'paddle', reprod_logger)
        evaluate(torch_batch[0], torch_batch[1], torch_model, accuracy_torch,
                 'ref', reprod_logger)
Esempio n. 2
0
def do_symmetric_surface(test_dir, in_plane_supercell=[1,1], pert_pos=0.0):
    assert len(supercell) == 2

    surf = ase.io.read(test_dir+"/surface.xyz", format="extxyz")
    surf *= list(in_plane_supercell) + [1]

    if pert_pos > 0.0:
        surf.rattle(pert_pos)

    bulk = rescale_to_relaxed_bulk(surf)
    bulk_Zs = bulk.get_atomic_numbers()
    evaluate(bulk)
    bulk_cell = bulk.get_cell()
    bulk_E = bulk.get_potential_energy()

    try:
        model.reset_config()
    except AttributeError:
        pass

    print("got relaxed bulk cell ", bulk_cell)
    print("got rescaled surf cell ", surf.get_cell())

    # relax surface system
    tol = 1.0e-2
    surf = relax_config(surf, relax_pos=True, relax_cell=False, tol=tol, save_traj=True,
                        config_label="surface", from_base_model=True, save_config=True, try_restart=True)

    ase.io.write(os.path.join("..",run_root+"-relaxed.xyz"),  surf, format='extxyz')

    # check stoichiometry and number of bulk cell energies to subtract
    surf_Zs = surf.get_atomic_numbers()
    Z0 = bulk_Zs[0]
    n_bulk_cells = float(sum(surf_Zs == Z0))/float(sum(bulk_Zs == Z0))
    if len(set(bulk_Zs)) == 1:
        n_dmu = None
    else:
        n_dmu = {}
        for Z in set(bulk_Zs):
            # make sure types are JSON compatible
            n_dmu[int(Z)] = float(n_bulk_cells*sum(bulk_Zs == Z) - sum(surf_Zs == Z))

    # calculate surface energy
    area = np.linalg.norm(np.cross(surf.get_cell()[0,:],surf.get_cell()[1,:]))

    print("got surface cell potential energy", surf.get_potential_energy())
    print("got bulk potential energy",bulk_E*n_bulk_cells)
    print("got area",area)

    return { "bulk_struct_test" : surf.info["bulk_struct_test"],
             "Ef" : (surf.get_potential_energy() - bulk_E*n_bulk_cells)/(2.0*area),
             "dmu" : n_dmu, 'filename' : run_root+"-relaxed.xyz" }
Esempio n. 3
0
def train(event, context):
    """Trains our ML model."""
    utilities.download_directory(uri=f"s3://{DATA_BUCKET}/{DATA_PREFIX}",
                                 dst=DATA_TMP_DST)
    df = utilities.read_csv_directory(DATA_TMP_DST)
    print(f"SHAPE: {df.shape}")

    train, test = utilities.train_test_split(df)
    X_train, y_train = utilities.Xy_split(train, target='y')
    X_test, y_test = utilities.Xy_split(test, target='y')

    X_train = utilities.preprocessing.preprocess(X_train)
    X_test = utilities.preprocessing.preprocess(X_test)

    model = utilities.Model()
    model.fit(X_train, y_train)

    y_hat = model.predict(X_test)
    eval_results = utilities.evaluate(y_actual=y_test, y_predict=y_hat)

    utilities.save_model(obj=model, uri=f"s3://{DATA_BUCKET}/{MODEL_PREFIX}")
    return {
        "status": "success",
        "results": eval_results,
    }
Esempio n. 4
0
def alphaBeta(board, currentDepth, maxDepth, emptySpaces, isXsTurn, alpha,
              beta):
    evaluation = evaluate(board)
    if currentDepth == maxDepth or not len(emptySpaces) or abs(
            evaluation) == abs(X_WIN):
        return evaluation, (), 0

    optimalMove = ()
    searches = 0
    optimalScore = -INF if isXsTurn else INF
    for space in emptySpaces:
        nextState, _ = move(board, space[0], space[1], X if isXsTurn else O)

        nextScore, _, subtreeSearches = alphaBeta(nextState, currentDepth + 1,
                                                  maxDepth,
                                                  findEmptySpaces(nextState),
                                                  not isXsTurn, alpha, beta)

        searches += 1 + subtreeSearches

        if isXsTurn and optimalScore < nextScore:
            optimalScore = nextScore
            optimalMove = space
            alpha = max(alpha, optimalScore)
        elif not isXsTurn and optimalScore > nextScore:
            optimalScore = nextScore
            optimalMove = space
            beta = min(beta, optimalScore)

        if beta <= alpha:
            break

    return optimalScore, optimalMove, searches
Esempio n. 5
0
def minimax(board, currentDepth, maxDepth, emptySpaces, isXsTurn):
    evaluation = evaluate(board)
    if currentDepth == maxDepth or not len(emptySpaces) or abs(
            evaluation) == abs(X_WIN):
        return evaluation, (), 0

    optimalMove = ()
    searches = 0

    optimalScore = -INF if isXsTurn else INF
    for space in emptySpaces:
        nextState, _ = move(board, space[0], space[1], X if isXsTurn else O)

        nextScore, _, subtreeSearches = minimax(nextState, currentDepth + 1,
                                                maxDepth,
                                                findEmptySpaces(nextState),
                                                not isXsTurn)

        searches += 1 + subtreeSearches

        if isXsTurn and optimalScore < nextScore:
            optimalScore = nextScore
            optimalMove = space
        elif not isXsTurn and optimalScore > nextScore:
            optimalScore = nextScore
            optimalMove = space
    """ Debug string:
        print(
            "Depth: {}, Empty Spaces: {}, X's turn: {}, Score: {}, Move: {}, Searches: {}"
            .format(currentDepth, len(emptySpaces), isXsTurn, optimalScore, optimalMove, searches)
        )
    """

    return optimalScore, optimalMove, searches
Esempio n. 6
0
def next_stump(dataset, stumps):
    # Initialize helper variables.
    cols = dataset.columns.get_values()
    y = dataset.iloc[:, 4].values
    stump = {}
    rim = []

    # ln 1: Special case, average all y(i) values for each x(i).
    #       The feature to use as root is chosen based off max change
    #       in variance.
    if len(stumps) == 0:
        # Overwrite stump value with average.
        init_subset = dataset
        init_subset['PR'] = np.average(y)
        stump = stump_from_dataset(init_subset)

    # Else build off of previous stump using gradient boosting.
    else:
        # ln 2(a): Find r(i)(m) for x(i) in data columns.
        for index, data in dataset.iterrows():
            rim.append(data['PE'] - utilities.evaluate(stumps, data))

        # ln 2(b): Fit a regression tree to targets r.
        rim_subset = dataset
        rim_subset['PR'] = rim
        stump = stump_from_dataset(rim_subset)

    stumps.append(stump)
    print(stump)
    print(utilities.mse(dataset, stumps))

    return stumps
Esempio n. 7
0
def do_one_vacancy(bulk_supercell, bulk_supercell_pe, vac_i, relax_radial=0.0, relax_symm_break=0.0, nn_cutoff=0.0, tol=1.0e-2):

    # do unrelaxed (without perturbations)
    vac = bulk_supercell.copy()
    del vac[vac_i]

    label = "ind_%d_Z_%d" % (vac_i, bulk_supercell.get_atomic_numbers()[vac_i])
    unrelaxed_filename=run_root+"-%s-unrelaxed.xyz" % label
    ase.io.write(os.path.join("..",unrelaxed_filename), vac, format='extxyz')
    evaluate(vac)
    unrelaxed_vac_pe = vac.get_potential_energy()

    # recreate with perturbations for relaxation
    vac = bulk_supercell.copy()

    if relax_radial != 0.0 or relax_symm_break != 0.0:
        nl = NeighborList([nn_cutoff/2.0]*len(bulk_supercell), self_interaction=False, bothways=True)
        nl.update(bulk_supercell)
        indices, offsets = nl.get_neighbors(vac_i)
        offset_factor = relax_radial
        for i, offset in zip(indices, offsets):
           ri = vac.positions[vac_i] - (vac.positions[i] + np.dot(offset, vac.get_cell()))
           vac.positions[i] += offset_factor*ri
           offset_factor += relax_symm_break

    del vac[vac_i]
    vac_pos = vac.positions[vac_i]

    vac = relax_config(vac, relax_pos=True, relax_cell=False, tol=tol, save_traj=True,
        config_label=label, from_base_model=True, save_config=True, try_restart=True)
    relaxed_filename=run_root+"-%s-relaxed.xyz" % label
    ase.io.write(os.path.join("..",relaxed_filename), vac, format='extxyz')

    # already has calculator from relax_configs
    vac_pe = vac.get_potential_energy()
    if len(set(bulk_supercell.get_atomic_numbers())) == 1:
        Ebulk = float(len(vac))/float(len(bulk_supercell)) * bulk_supercell_pe
    else:
        Ebulk = bulk_supercell_pe
    Ef0 = unrelaxed_vac_pe - Ebulk
    Ef = vac_pe - Ebulk
    print("got vacancy",label,"cell energy",vac_pe,"n_atoms",len(vac))
    print("got bulk energy", Ebulk," (scaled to (N-1)/N if single component)")
    return ( label, unrelaxed_filename, Ef0, relaxed_filename, Ef, int(bulk_supercell.get_atomic_numbers()[vac_i]), vac_pos )
Esempio n. 8
0
def do_one_antisite_pair(bulk_supercell,
                         bulk_supercell_pe,
                         i1,
                         i2,
                         tol=1.0e-2):
    assert bulk_supercell.numbers[i1] != bulk_supercell.numbers[i2]

    # do unrelaxed (without perturbations)
    antisite = bulk_supercell.copy()
    Z1, Z2 = antisite.numbers[[i1, i2]]
    antisite.numbers[i1] = Z2
    antisite.numbers[i2] = Z1

    label = "ind_%d_Z_%d_ind_%d_Z_%d" % (i1, Z1, i2, Z2)
    unrelaxed_filename = run_root + "-%s-unrelaxed.xyz" % label
    ase.io.write(os.path.join("..", unrelaxed_filename),
                 antisite,
                 format='extxyz')
    evaluate(antisite)
    unrelaxed_antisite_pe = antisite.get_potential_energy()

    antisite = relax_config(antisite,
                            relax_pos=True,
                            relax_cell=False,
                            tol=tol,
                            save_traj=True,
                            config_label=label,
                            from_base_model=True,
                            save_config=True,
                            try_restart=True)
    relaxed_filename = run_root + "-%s-relaxed.xyz" % label
    ase.io.write(os.path.join("..", relaxed_filename),
                 antisite,
                 format='extxyz')

    # already has calculator from relax_configs
    antisite_pe = antisite.get_potential_energy()
    Ef0 = unrelaxed_antisite_pe - bulk_supercell_pe
    Ef = antisite_pe - bulk_supercell_pe
    print("got antisite", label, "cell energy", antisite_pe, "n_atoms",
          len(antisite))
    print("got bulk energy", bulk_supercell_pe)
    return (label, unrelaxed_filename, Ef0, relaxed_filename, Ef, Z1, Z2)
Esempio n. 9
0
losses = []
m = 150

for i in range(1, m + 1):
    print("Step %d of %d\n" % (i, m))
    stumps = next_stump(train_dataset, stumps)
    losses.append(utilities.mse(train_dataset, stumps))
    # Save trees to a file.
    f = open("trees_%d.txt" % (m), "a+")
    f.write("%s:%f:%f:%f:%f\n" %
            (stumps[-1]['attribute'], stumps[-1]['value'], stumps[-1]['left'],
             stumps[-1]['right'], losses[-1]))
    f.close()

    # Check stopping condition(s).
    if stumps[-1]['gradient'] == math.inf:
        m = i
        break

utilities.plot_loss(m, losses, train_dataset,
                    'Mean Standard Error; lambda = 0.50', 'cornflowerblue')

test_losses = []
y_test = test_dataset.iloc[:, 4].values.tolist()
for index, data in test_dataset.iterrows():
    test_losses.append((data['PE'] - utilities.evaluate(stumps, data))**2)

utilities.plot_test(test_losses, y_test, 'Non-Library Loss for Test Dataset',
                    'lightseagreen')
utilities.stats(test_dataset, stumps, 'Gradient Boosting Tree Algorithm')
Esempio n. 10
0
def do_all_interstitials(test_dir, nn_cutoff=0.0, tol=1.0e-2):
    print("doing do_all_interstitials")
    bulk_supercell = ase.io.read(os.path.join(test_dir, "bulk_supercell.xyz"),
                                 format="extxyz")
    print("got bulk_supercell ", len(bulk_supercell))

    bulk = rescale_to_relaxed_bulk(bulk_supercell)
    # relax bulk supercell positions in case it's only approximate (as it must be for different models), but stick
    # to relaxed bulk's lattice constants as set by rescale_to_relaxed_bulk
    bulk_supercell = relax_config(bulk_supercell,
                                  relax_pos=True,
                                  relax_cell=False,
                                  tol=tol,
                                  save_traj=True,
                                  config_label="relaxed_bulk",
                                  from_base_model=True,
                                  save_config=True)

    ase.io.write(os.path.join("..", run_root + "-rescaled-bulk.xyz"),
                 bulk_supercell,
                 format='extxyz')

    print("got bulk primitive cell ", bulk.get_cell())
    print("got rescaled bulk_supercell cell ", bulk_supercell.get_cell())

    try:  # Cartesian 3-vector
        interstitial_pos_l = [
            np.array([float(x) for x in bulk_supercell.info["interstitials"]])
        ]
        if len(interstitial_pos_l) != 3:
            raise ValueError("not a 3-vector")
    except:
        interstitial_pos_type = bulk_supercell.info["interstitials"].split()[0]
        if interstitial_pos_type == "mean":
            neighbor_indices = [
                int(i)
                for i in bulk_supercell.info["interstitials"].split()[1:]
            ]
            if len(neighbor_indices) < 2:
                raise ValueError(
                    "interstitial position type mean, but {} < 2 indices".
                    format(len(neighbor_indices)))
            interstitial_pos_l = [
                np.sum(bulk_supercell.get_positions()[neighbor_indices],
                       axis=0) / float(len(neighbor_indices))
            ]
        elif interstitial_pos_type == "inequivalent":
            if 'arb_supercell' in bulk_supercell.info:
                print("making bulk supercell from",
                      bulk_supercell.info['arb_supercell'].reshape((3, 3)))
                bulk_supersupercell = ase.build.make_supercell(
                    bulk_supercell,
                    bulk_supercell.info['arb_supercell'].reshape((3, 3)))
                print("got supersupercell with ", len(bulk_supersupercell),
                      "atoms, cell\n", bulk_supersupercell.get_cell())
            voids = find_voids(bulk_supercell)
            interstitial_pos_l = [(v[1], v[2], v[3]) for v in voids]

            bulk_supersupercell.info.update(bulk_supercell.info)
            bulk_supercell = bulk_supersupercell
        else:
            raise ValueError("Unknown interstitial position type in '" +
                             bulk_supercell.info["interstitials"] + "'")

    evaluate(bulk_supercell)
    bulk_supercell_pe = bulk_supercell.get_potential_energy()
    properties = {
        "bulk_struct_test": bulk_supercell.info["bulk_struct_test"],
        "bulk_E_per_atom": bulk_supercell_pe / len(bulk_supercell),
        "defects": {}
    }

    Z_list = ([bulk_supercell.info["Zs"]] if isinstance(
        bulk_supercell.info["Zs"],
        (int, np.integer)) else bulk_supercell.info["Zs"])

    for interstitial_Z in Z_list:
        try:
            relax_radial = bulk_supercell.info['relax_radial_{}'.format(
                interstitial_Z)]
        except:
            relax_radial = 0.0
        try:
            relax_symm_break = bulk_supercell.info[
                'relax_symm_break_{}'.format(interstitial_Z)]
        except:
            relax_symm_break = 0.0

        for interst_i, interst_pos in enumerate(interstitial_pos_l):
            label = f'Z_{interstitial_Z}_pos_{interst_i}'
            (unrelaxed_filename, Ef0,
             relaxed_filename, Ef, interstitial_i) = do_one_interstitial(
                 bulk_supercell, bulk_supercell_pe, interstitial_Z,
                 interst_pos, label, relax_radial, relax_symm_break, nn_cutoff,
                 tol)

            properties["defects"][label] = {
                'Ef0': Ef0,
                'Ef': Ef,
                'unrelaxed_filename': unrelaxed_filename,
                'relaxed_filename': relaxed_filename,
                'atom_ind': int(interstitial_i),
                'Z': int(interstitial_Z),
                'pos_index': interst_i
            }
            if len(set(bulk_supercell.get_atomic_numbers())) != 1:
                properties["defects"][label]['dmu'] = [-1, int(interstitial_Z)]

    return properties
Esempio n. 11
0
def do_farthest_inequiv_pairs(test_dir, tol=1.0e-2):
    print("doing do_farthest_antisite_pairs")
    bulk_supercell = ase.io.read(os.path.join(test_dir, "bulk_supercell.xyz"),
                                 format="extxyz")
    print("got bulk_supercell ", len(bulk_supercell))

    bulk = rescale_to_relaxed_bulk(bulk_supercell)
    # relax bulk supercell positions in case it's only approximate (as it must be for different models), but stick
    # to relaxed bulk's lattice constants as set by rescale_to_relaxed_bulk
    bulk_supercell = relax_config(bulk_supercell,
                                  relax_pos=True,
                                  relax_cell=False,
                                  tol=tol,
                                  save_traj=True,
                                  config_label="rescaled_bulk",
                                  from_base_model=True,
                                  save_config=True)

    ase.io.write(os.path.join("..", run_root + "-rescaled-bulk.xyz"),
                 bulk_supercell,
                 format='extxyz')

    print("got bulk primitive cell ", bulk.get_cell())
    print("got rescaled bulk_supercell cell ", bulk_supercell.get_cell())

    if 'arb_supercell' in bulk_supercell.info:
        print("making bulk supercell from",
              bulk_supercell.info['arb_supercell'].reshape((3, 3)))
        bulk_supersupercell = ase.build.make_supercell(
            bulk_supercell, bulk_supercell.info['arb_supercell'].reshape(
                (3, 3)))
        print("got supersupercell with ", len(bulk_supersupercell),
              "atoms, cell\n", bulk_supersupercell.get_cell())

        bulk_supersupercell.info.update(bulk_supercell.info)
        bulk_supercell = bulk_supersupercell

    sym_data = spglib.get_symmetry_dataset(bulk_supercell, symprec=0.01)
    equiv_at = set([
        tuple(iZ)
        for iZ in zip(sym_data["equivalent_atoms"], bulk_supercell.numbers)
    ])

    # print("equiv_at", equiv_at)

    antisite_list = []
    for i1, Z1 in equiv_at:
        for i2_proto, Z2 in equiv_at:
            if Z1 <= Z2:
                continue
            # print("check i", i1, i2_proto, "Z", Z1, Z2)

            i2s = np.where(sym_data["equivalent_atoms"] == i2_proto)[0]
            i2_dists = bulk_supercell.get_distances(i1, i2s, mic=True)
            farthest_ind = np.argmax(i2_dists)
            i2 = i2s[farthest_ind]

            antisite_list.append((i1, i2))

    # print("antisite_list", antisite_list) ##

    evaluate(bulk_supercell)
    bulk_supercell_pe = bulk_supercell.get_potential_energy()
    properties = {
        "bulk_struct_test": bulk_supercell.info["bulk_struct_test"],
        "bulk_E_per_atom": bulk_supercell_pe / len(bulk_supercell),
        "defects": {}
    }

    for i1, i2 in antisite_list:
        (label, unrelaxed_filename, Ef0, relaxed_filename, Ef, Z1,
         Z2) = do_one_antisite_pair(bulk_supercell, bulk_supercell_pe, i1, i2,
                                    tol)

        properties["defects"][label] = {
            'Ef0': Ef0,
            'Ef': Ef,
            'unrelaxed_filename': unrelaxed_filename,
            'relaxed_filename': relaxed_filename,
            'atom_inds': (int(i1), int(i2)),
            'Zs': (int(Z1), int(Z2))
        }

    print("returning properties", properties)
    return properties
Esempio n. 12
0
 def test_evaluate(self):
     np.random.seed(123)
     truth = np.random.poisson(2.0, 100)
     prediction = np.ones(100) * 2.0
     plt.clf()
     utilities.evaluate(prediction, truth)
Esempio n. 13
0
def do_one_interstitial(bulk_supercell,
                        bulk_supercell_pe,
                        interstitial_Z,
                        interstitial_pos,
                        label,
                        relax_radial=0.0,
                        relax_symm_break=0.0,
                        nn_cutoff=0.0,
                        tol=1.0e-2):

    interst = bulk_supercell.copy()
    interst += Atoms(numbers=[interstitial_Z], positions=[interstitial_pos])
    interstitial_i = len(interst) - 1

    if relax_radial != 0.0 or relax_symm_break != 0.0:
        nl = NeighborList([nn_cutoff / 2.0] * len(bulk_supercell),
                          self_interaction=False,
                          bothways=True)
        nl.update(bulk_supercell)
        indices, offsets = nl.get_neighbors(interstitial_i)
        offset_factor = relax_radial
        for i, offset in zip(indices, offsets):
            ri = interst.positions[interstitial_i] - (
                interst.positions[i] + np.dot(offset, interst.get_cell()))
            interst.positions[i] += offset_factor * ri
            offset_factor += relax_symm_break

    if "interstitial_constraint" in bulk_supercell.info:
        (constr_type, constr_subtype
         ) = bulk_supercell.info["interstitial_constraint"].split()[0:2]

        if constr_type == "plane":
            if constr_subtype == "atoms":
                indices = [
                    int(i) for i in
                    bulk_supercell.info["interstitial_constraint"].split()[2:]
                ]
                if len(indices) != 3:
                    raise ValueError(
                        "number of indices not 3 for plane atoms '{}'".format(
                            bulk_supercell.info["interstitial_constraint"]))
                p = interst.get_positions()
                constr_normal = np.cross(p[indices[0]] - p[indices[1]],
                                         p[indices[0]] - p[indices[2]])
            elif constr_subtype == "vector":
                constr_normal = np.array(
                    bulk_supercell.info["interstitial_constraint"].split()[2:])
            else:
                raise ValueError(
                    "unknown interstitial constraint subtype for plane '{}'".
                    format(bulk_supercell.info["interstitial_constraint"]))

            print("setting constraint FixedPlane with normal", constr_normal)
            interst.set_constraint(FixedPlane(interstitial_i, constr_normal))
        else:
            raise ValueError(
                "unknown interstitial constraint type '{}'".format(
                    bulk_supercell.info["interstitial_constraint"]))

    evaluate(interst)
    unrelaxed_interstitial_pe = interst.get_potential_energy()

    if len(set(bulk_supercell.get_atomic_numbers())) == 1:
        Ebulk = float(len(interst)) / float(
            len(bulk_supercell)) * bulk_supercell_pe
    else:
        Ebulk = bulk_supercell_pe
    Ef0 = unrelaxed_interstitial_pe - Ebulk

    unrelaxed_filename = run_root + "-%s-unrelaxed.xyz" % label
    ase.io.write(os.path.join("..", unrelaxed_filename),
                 interst,
                 format='extxyz')

    print("got unrelaxed interstitial {} cell energy".format(label),
          unrelaxed_interstitial_pe)

    try:
        interst = relax_config(interst,
                               relax_pos=True,
                               relax_cell=False,
                               tol=tol,
                               save_traj=True,
                               config_label=label,
                               from_base_model=True,
                               save_config=True,
                               try_restart=True)

        relaxed_filename = run_root + "-%s-relaxed.xyz" % label
        ase.io.write(os.path.join("..", relaxed_filename),
                     interst,
                     format='extxyz')

        interstitial_pe = interst.get_potential_energy()
        Ef = interstitial_pe - Ebulk

        print("got relaxed interstitial {} cell energy".format(label),
              interstitial_pe)
    except:
        relaxed_filename = None
        Ef = None

    print("got bulk energy", Ebulk)
    return (unrelaxed_filename, Ef0, relaxed_filename, Ef, interstitial_i)
def do_interstitial(test_dir, nn_cutoff=0.0, tol=1.0e-2):
    print("doing do_interstitial")
    bulk_supercell = ase.io.read(os.path.join(test_dir, "bulk_supercell.xyz"),
                                 format="extxyz")
    print("got bulk_supercell ", len(bulk_supercell))

    bulk = rescale_to_relaxed_bulk(bulk_supercell)
    # relax bulk supercell positions in case it's only approximate (as it must be for different models), but stick
    # to relaxed bulk's lattice constants as set by rescale_to_relaxed_bulk
    bulk_supercell = relax_config(bulk_supercell,
                                  relax_pos=True,
                                  relax_cell=False,
                                  tol=tol,
                                  traj_file=None,
                                  config_label="relaxed_bulk",
                                  from_base_model=True,
                                  save_config=True)

    evaluate(bulk_supercell)
    bulk_supercell_pe = bulk_supercell.get_potential_energy()

    ase.io.write(os.path.join("..", run_root + "-rescaled-bulk.xyz"),
                 bulk_supercell,
                 format='extxyz')

    print("got bulk primitive cell ", bulk.get_cell())
    print("got rescaled bulk_supercell cell ", bulk_supercell.get_cell())

    properties = {
        "bulk_struct_test": bulk_supercell.info["bulk_struct_test"],
        "bulk_E_per_atom": bulk_supercell_pe / len(bulk_supercell),
        "defects": {}
    }

    try:  # Cartesian 3-vector
        interstitial_pos = np.array(
            [float(x) for x in bulk_supercell.info["interstitial_position"]])
        if len(interstitial_pos) != 3:
            raise ValueError("not a 3-vector")
    except:
        interstitial_pos_type = bulk_supercell.info[
            "interstitial_position"].split()[0]
        if interstitial_pos_type == "mean":
            neighbor_indices = [
                int(i) for i in
                bulk_supercell.info["interstitial_position"].split()[1:]
            ]
            if len(neighbor_indices) < 2:
                raise ValueError(
                    "interstitial position type mean, but {} < 2 indices".
                    format(len(neighbor_indices)))
            interstitial_pos = np.sum(
                bulk_supercell.get_positions()[neighbor_indices],
                axis=0) / float(len(neighbor_indices))
        else:
            raise ValueError("Unknown interstitial position type in '" +
                             bulk_supercell.info["interstitial_position"] +
                             "'")

    if isinstance(bulk_supercell.info["Zs"], list):
        Z_list = bulk_supercell.info["Zs"]
    else:
        Z_list = [bulk_supercell.info["Zs"]]
    for interstitial_Z in Z_list:

        try:
            relax_radial = bulk_supercell.info['relax_radial_{}'.format(
                interstitial_Z)]
        except:
            relax_radial = 0.0
        try:
            relax_symm_break = bulk_supercell.info[
                'relax_symm_break_{}'.format(interstitial_Z)]
        except:
            relax_symm_break = 0.0

        (label, unrelaxed_filename, Ef0,
         relaxed_filename, Ef, interstitial_i) = do_one_interstitial(
             bulk_supercell, bulk_supercell_pe, interstitial_Z,
             interstitial_pos, relax_radial, relax_symm_break, nn_cutoff, tol)

    properties["defects"][label] = {
        'Ef0': Ef0,
        'Ef': Ef,
        'unrelaxed_filename': unrelaxed_filename,
        'relaxed_filename': relaxed_filename,
        'atom_ind': int(interstitial_i),
        'Z': int(interstitial_Z)
    }
    if len(set(bulk_supercell.get_atomic_numbers())) != 1:
        properties["defects"][label]['dmu'] = [-1, interstitial_Z]

    return properties
Esempio n. 15
0
def do_lattice(bulk, use_precon=True, elastic=True, tol=1.0e-3):

    print "unrelaxed bulk"
    ase.io.write(sys.stdout, bulk, format='extxyz')

    # use one of the routines from utilities module to relax the initial
    # unit cell and atomic positions
    if use_precon:
        bulk = relax_atoms_cell(bulk,
                                tol=tol,
                                traj_file="bulk.relax.extxyz",
                                symmetrize=True)
    else:
        bulk = relax_atoms_cell(bulk,
                                tol=tol,
                                traj_file=None,
                                method='cg_n',
                                symmetrize=True)

    print "relaxed bulk"
    ase.io.write(sys.stdout, bulk, format='extxyz')

    print "calculating elastic constants"
    precon = None
    if use_precon:
        precon = Exp(3.0)
    opt = lambda atoms, **kwargs: PreconLBFGS(atoms, precon=precon, **kwargs)
    if elastic:
        # reset calculator to non-symmetrized one (not optimal, but would otherwise need to have optimizer used by fit_elastic_constants to reset symmetry for each relaxation):w
        bulk.set_calculator(model.calculator)
        try:
            elastic_consts = matscipy.elasticity.fit_elastic_constants(
                bulk,
                symmetry='tetragonal_high',
                optimizer=opt,
                logfile=sys.stdout)
        except RuntimeError:
            # fallback on FIRE if we get a linesearch failure with LBFGS
            opt = FIRE
            elastic_consts = matscipy.elasticity.fit_elastic_constants(
                bulk,
                symmetry='tetragonal_high',
                optimizer=opt,
                logfile=sys.stdout)

        c11 = elastic_consts[0][0, 0] / GPa
        c33 = elastic_consts[0][2, 2] / GPa
        c12 = elastic_consts[0][0, 1] / GPa
        c13 = elastic_consts[0][0, 2] / GPa
        c44 = elastic_consts[0][3, 3] / GPa
        c66 = elastic_consts[0][5, 5] / GPa

    print "calculating E vs. V"
    V0 = bulk.get_volume()
    dV = bulk.get_volume() * 0.025
    E_vs_V = []
    scaled_bulk = bulk.copy()
    print "bulk going into E vs. V"
    ase.io.write(sys.stdout, scaled_bulk, format='extxyz')
    f = open("relaxed_E_vs_V_configs.xyz", "w")

    scaled_bulk = bulk.copy()
    constraints = []
    # scaled_bulk.arrays["move_mask_3"] = np.zeros((len(scaled_bulk),3), dtype=np.int)
    # scaled_bulk.arrays["move_mask_3"][:,0] = 1
    # for i in range(len(scaled_bulk)):
    # constraints.append(FixedLine_forces_only(i, (0.0, 0.0, 1.0)))
    # scaled_bulk.set_constraint(constraints)
    for i in range(0, -5 - 1, -1):
        print "doing volume step", i
        vcur = scaled_bulk.get_volume()
        scaled_bulk.set_cell(scaled_bulk.get_cell() *
                             ((V0 + i * dV) / vcur)**(1.0 / 3.0),
                             scale_atoms=True)
        try:
            scaled_bulk = relax_atoms_cell(scaled_bulk,
                                           tol=tol,
                                           traj_file=None,
                                           constant_volume=True,
                                           method='cg_n',
                                           symmetrize=True,
                                           max_steps=500)
        except:
            break
        print "done relaxing step", i
        ase.io.write(f, scaled_bulk, format='extxyz')
        f.flush()
        E_vs_V.insert(0, (scaled_bulk.get_volume() / len(bulk),
                          scaled_bulk.get_potential_energy() / len(bulk)))
        evaluate(scaled_bulk)
        print "done evaluate step", i
        print "EV ", i, scaled_bulk.get_volume(
        ), scaled_bulk.get_potential_energy(), scaled_bulk.get_stress()

    scaled_bulk = bulk.copy()
    # scaled_bulk.arrays["move_mask_3"] = np.zeros((len(scaled_bulk),3), dtype=np.int)
    # scaled_bulk.arrays["move_mask_3"][:,0] = 1
    # scaled_bulk.set_constraint(constraints)
    for i in range(1, 6 + 1):
        print "doing volume step", i
        vcur = scaled_bulk.get_volume()
        scaled_bulk.set_cell(scaled_bulk.get_cell() *
                             ((V0 + i * dV) / vcur)**(1.0 / 3.0),
                             scale_atoms=True)
        try:
            scaled_bulk = relax_atoms_cell(scaled_bulk,
                                           tol=tol,
                                           traj_file=None,
                                           constant_volume=True,
                                           method='cg_n',
                                           symmetrize=True,
                                           max_steps=500)
        except:
            break
        print "done relaxing step", i
        ase.io.write(f, scaled_bulk, format='extxyz')
        f.flush()
        E_vs_V.append((scaled_bulk.get_volume() / len(bulk),
                       scaled_bulk.get_potential_energy() / len(bulk)))
        evaluate(scaled_bulk)
        print "done evaluate step", i
        print "EV ", i, scaled_bulk.get_volume(
        ), scaled_bulk.get_potential_energy(), scaled_bulk.get_stress()

    for (V, E) in E_vs_V:
        print "EV_final ", V, E

    if elastic:
        return (c11, c33, c12, c13, c44, c66, E_vs_V)
    else:
        return (E_vs_V)
Esempio n. 16
0
def main():
    """
    Entry point for list-extractor. Parses parameters and calls other modules in order to serialize a RDF graph.
    
    Takes following **command-line parameters**:
    
    * **collect_mode** : ``s`` or ``a``

        Use ``s`` to specify a single resource or ``a`` for a class of resources in the next parameter.

    * **source** : a string representing a class of resources from DBpedia ontology (find supported domains below),
    or a single Wikipedia page of any resource.

    * **language**: ``en``, ``it``, ``de``, ``es`` (for now, available only for selected domains)

        a two-letter prefix corresponding to the desired language of Wikipedia pages and SPARQL endpoint to be queried.

    * **-c --classname**: a string representing classnames you want to associate your resource with. Applicable only for
     ``collect_mode="s"``.

    """

    # initialize argparse parameters
    parser = argparse.ArgumentParser(description='Extract data from lists in Wikipedia pages and serialize it in RDF.'
                                                 '\nExample: `python listExtractor.py a Writer en.`',
                                     formatter_class=argparse.RawTextHelpFormatter,
                                     usage="listExtractor.py [--help] collection_mode resource language "
                                           "\nUse listExtractor.py -h for more details.\n ")
    parser.add_argument('collect_mode',
                        help="'s' to specify a single Wikipedia page;\n'a' for all resources from a DBpedia class.\n ",
                        choices=['s', 'a', 'l', 'q'])
    parser.add_argument('source', type=str,
                        help='Select resource to extract lists from. Options are:'
                             '\nSingle Wikipedia page (example: William_Gibson) '
                             '\nDBpedia ontology class (example: Writer)\n ')
    parser.add_argument('language', type=str, choices=['en', 'it', 'de', 'es'], default='en',
                        help='Language prefix of Wikipedia pages to analyze.'
                             '\nen: English (Default)\nit: Italian\nde: German\nes: Spanish\n')
    parser.add_argument("-c", "--classname", type=str, help='Provide a classname from settings.json and use its'
                                                            '\nmapper functions')
    parser.add_argument('pattern', type=int, default=0,
                        help='Pattern that will be applied to extracting')

    args = parser.parse_args()

    # initialize RDF graph which will contain the triples
    g = rdflib.Graph()
    g.bind("dbo", "http://dbpedia.org/ontology/")
    g.bind("dbr", "http://dbpedia.org/resource/")

    # start extracting lists from resources
    if args.collect_mode == 's':  # extract list information from a single resource
        # resource = args.source.encode('utf-8')  # apply utf-8 encoding
        resource = args.source.encode()
        # resource = args.source
        resDict = wikiParser.main_parser(args.language, resource)  # create a dict representing the resource

        for key in resDict:  # print data present in the resDict
            print(key, ":", resDict[key])
            print('')

        ''' Decomment the line below to create a file inside a resources folder containing the dictionary'''
        utilities.createResFile(resDict, args.language, resource)

        # Asks the endpoint for a list of types/classes associated to the resource
        if args.classname is None:
            rdf_type = utilities.get_resource_type(args.language, resource)
            print(rdf_type)
        else:
            rdf_type = [classes.strip() for classes in args.classname.split(',')]
            # print rdf_type

        list_elems = 0  # Used to keep trace of the number of list elements extracted
        for t in rdf_type:  # for each type found, look for a suitable mapping and apply it
            list_elems += mapper.select_mapping(resDict, resource, args.language, t,
                                                g)  # get number of elements extracted
            # print '>>>>>', t, list_elems
        tot_list_elems = utilities.count_listelem_dict(resDict)  # count all list elements of the resource
        print("Total elements extracted: " + str(list_elems) + "/" + str(tot_list_elems))

    elif args.collect_mode == 'a':  # extract lists from a class of resources from DBpedia ontology (e.g. 'Writer')
        if utilities.check_existing_class(
                args.source):  # Check if the domain has already been mapped (in settings.json)
            try:
                print('Fetching resources, please wait......')
                resources = utilities.get_resources(args.language, args.source)
                res_num = len(resources)  # total number of resources
                curr_num = 1  # current resource to be analyzed
            except:
                print("Could not find specified class of resources: " + args.source)
                sys.exit(0)
        else:
            print('\nThis domain has not been mapped yet!')
            print('You can add a mapping for this domain using rulesGenerator.py and try again...')
            sys.exit(0)

        tot_extracted_elems = 0  # Used to keep track of the number of list elements extracted
        tot_elems = 0  # Used to keep track of total number of list elements
        total_res_failed = 0
        print('Completed! Found', str(res_num), 'resources.\nStarting extraction....\n')
        for res in resources:
            try:
                print(res + " (" + str(curr_num) + " of " + str(res_num) + ")")
                resDict = wikiParser.main_parser(args.language, res)  # create a dict representing each resource
                tot_elems += utilities.count_listelem_dict(resDict)

                '''Decomment the line below to create a file inside a resources folder containing the dictionary'''
                # utilities.createResFile(resDict, args.language, res)

            except:  # handle parsing errors; no dict found or no relevant sections found
                print("Could not parse " + args.language + ":" + res)
                total_res_failed += 1
                curr_num += 1

            else:  # succesfully parsed; proceed and form triples
                curr_num += 1
                print(">>> " + args.language + ":" + res + " has been successfully parsed <<<")
                extr_elems = mapper.select_mapping(resDict, res, args.language, args.source, g)
                mapper.mapped_domains = []  # reset domains already mapped for next resource
                tot_extracted_elems += extr_elems
                print(
                    ">>> Mapped " + args.language + ":" + res + ", extracted elements: " + str(extr_elems) + "  <<<\n")

        # evaluation metrics for the extraction process; store relevant stats in evaluation.csv
        utilities.evaluate(args.language, args.source, res_num, res_num - total_res_failed,
                           tot_extracted_elems, tot_elems, len(g))

    elif args.collect_mode == 'l':
        resource = args.source.encode()
        resDict = wikiParser.main_parser(args.language, resource)  # create a dict representing the resource
        pattern = args.pattern

        for key in resDict:  # print data present in the resDict
            print(key, ":", resDict[key])
            print('')

        ''' Decomment the line below to create a file inside a resources folder containing the dictionary'''
        utilities.createResFile(resDict, args.language, resource)

        list_elems = 0  # Used to keep trace of the number of list elements extracted
        # for each type found, look for a suitable mapping and apply it
        list_elems += listMApper.list_select_mapping(resDict, resource, args.language, g, pattern)  # get number of elements extracted
        # print '>>>>>', t, list_elems
        tot_list_elems = utilities.count_listelem_dict(resDict)  # count all list elements of the resource
        print("Total elements extracted: " + str(list_elems) + "/" + str(tot_list_elems))

    elif args.collect_mode == 'q':
        pages = utilities.find_all_element()
        for p in pages:
            print(p)
        print("finish finding")
        for p in pages:
            resource = p.replace(" ", "_").encode()
            resDict = wikiParser.main_parser(args.language, resource)  # create a dict representing the resource

            for key in resDict:  # print data present in the resDict
                print(key, ":", resDict[key])
                print('')

            ''' Decomment the line below to create a file inside a resources folder containing the dictionary'''
            utilities.createResFile(resDict, args.language, resource)

            list_elems = 0  # Used to keep trace of the number of list elements extracted
            # for each type found, look for a suitable mapping and apply it
            list_elems += listMApper.list_select_mapping(resDict, resource, args.language,
                                                         g)  # get number of elements extracted
            # print '>>>>>', t, list_elems
            tot_list_elems = utilities.count_listelem_dict(resDict)  # count all list elements of the resource
            print("Total elements extracted: " + str(list_elems) + "/" + str(tot_list_elems))


    # If the graph contains at least one statement, create a .ttl file with the RDF triples created
    g_length = len(g)
    if g_length > 0:
        file_name = "ListExtractor_" + args.source + "_" + args.language + "_" + utilities.getDate() + ".ttl"
        file_path = utilities.get_subdirectory('extracted', file_name)
        g.serialize(file_path, format="turtle")
        print(str(g_length) + " statements created. Triples serialized in: " + file_path)
    else:
        print("Could not serialize any RDF statement! :(")
Esempio n. 17
0
# set of utility routines specific this this model/testing framework
# the current model
import model, utilities

force_component_errors = []
ats = ase.io.read(os.path.join(os.path.dirname(__file__),
                               'testing_database.xyz'),
                  index=':',
                  format='extxyz')
for at in ats:
    if len(at) > 1:
        at.wrap()
        # ase.io.write(sys.stdout, at, format="extxyz")
        # sys.stdout.flush()
        try:
            at = utilities.evaluate(at)
            try:
                dft_f = at.arrays['dft_force']
            except:
                try:
                    dft_f = at.arrays['DFT_force']
                except:
                    pass
            f = at.get_forces()
            for i in range(len(at)):
                try:
                    c_t = at.info['config_type']
                except KeyError:
                    c_t = 'NONE'
                force_component_errors.append(
                    (dft_f[i, 0], f[i, 0] - dft_f[i, 0], c_t))
Esempio n. 18
0
def do_all_vacancies(test_dir, nn_cutoff=0.0, tol=1.0e-2):
    print("doing do_all_vacancies")
    bulk_supercell = ase.io.read(os.path.join(test_dir,"bulk_supercell.xyz"), format="extxyz")
    print("got bulk_supercell ", len(bulk_supercell))

    bulk = rescale_to_relaxed_bulk(bulk_supercell)
    # relax bulk supercell positions in case it's only approximate (as it must be for different models), but stick
    # to relaxed bulk's lattice constants as set by rescale_to_relaxed_bulk
    bulk_supercell = relax_config(bulk_supercell, relax_pos=True, relax_cell=False, tol=tol, save_traj=True,
        config_label="rescaled_bulk", from_base_model=True, save_config=True)

    ase.io.write(os.path.join("..",run_root+"-rescaled-bulk.xyz"),  bulk_supercell, format='extxyz')

    print("got bulk primitive cell ", bulk.get_cell())
    print("got rescaled bulk_supercell cell ", bulk_supercell.get_cell())

    if bulk_supercell.info['vacancies'] == "inequivalent":
        sym_data = spglib.get_symmetry_dataset(bulk_supercell, symprec=0.01)
        prim_vacancy_list = np.unique(sym_data["equivalent_atoms"])
        print("orig cell vacancy_list", prim_vacancy_list)
        if 'arb_supercell' in bulk_supercell.info:
            print("making bulk supercell from", bulk_supercell.info['arb_supercell'].reshape((3,3)) )
            bulk_supersupercell = ase.build.make_supercell(bulk_supercell,bulk_supercell.info['arb_supercell'].reshape((3,3)) )
            print("got supersupercell with ",len(bulk_supersupercell),"atoms, cell\n",bulk_supersupercell.get_cell())
            vacancy_list = []
            for i in prim_vacancy_list:
                p = bulk_supercell.get_positions()[i]
                dv = bulk_supersupercell.get_positions() - p
                dv_scaled = np.dot(dv, bulk_supersupercell.get_reciprocal_cell().T)
                dv -= np.dot(np.round(dv_scaled), bulk_supersupercell.get_cell())
                i_closest = np.argmin(np.linalg.norm(dv, axis=1))
                print("found closest in new cell", i_closest, "distance in orig cell lattice coords", np.dot((bulk_supersupercell.get_positions()[i_closest]-p), \
                                                                                                             bulk_supercell.get_reciprocal_cell().T))
                vacancy_list.append(i_closest)
            bulk_supersupercell.info.update(bulk_supercell.info)
            bulk_supercell = bulk_supersupercell
        else:
            vacancy_list = prim_vacancy_list
        print("final vacancy_list", vacancy_list)
    else:
        try:
            vacancy_list = [ int(i) for i in bulk_supercell.info['vacancies'] ]
        except:
            vacancy_list = [ int(bulk_supercell.info['vacancies']) ]

    evaluate(bulk_supercell)
    bulk_supercell_pe = bulk_supercell.get_potential_energy()
    properties = { "bulk_struct_test" : bulk_supercell.info["bulk_struct_test"], "bulk_E_per_atom" : bulk_supercell_pe / len(bulk_supercell), "defects" : {} }

    for vac_i in vacancy_list:
        # maybe set up a system to read these from xyz file?
        try:
            relax_radial = bulk_supercell.info['relax_radial_{}'.format(vac_i)]
        except:
            relax_radial = 0.0
        try:
            relax_symm_break = bulk_supercell.info['relax_symm_break_{}'.format(vac_i)]
        except:
            relax_symm_break = 0.0
        (label, unrelaxed_filename, Ef0, relaxed_filename, Ef, vac_Z, vac_pos) = do_one_vacancy(bulk_supercell, bulk_supercell_pe, vac_i, relax_radial, relax_symm_break, nn_cutoff, tol)

        properties["defects"][label] = { 'Ef0' : Ef0, 'Ef' : Ef, 'unrelaxed_filename' : unrelaxed_filename,'relaxed_filename' : relaxed_filename,
            'atom_ind' : int(vac_i), 'Z' : int(vac_Z), 'vac_pos' : vac_pos.tolist()}
        if len(set(bulk_supercell.get_atomic_numbers())) > 1:
            properties["defects"][label]["dmu"] = [1, vac_Z]

    print("returning properties", properties)
    return properties