def collate_disc(output, infiles):
    with pd.HDFStore(output) as dst:
        for file in infiles:
            file = Path(file)
            print(file)
            df = pd.read_csv(file)
            fvars = get_filename_vars(file)
            df["temperature"] = float(fvars.temperature)
            df["pressure"] = float(fvars.pressure)
            if fvars.crystal is None:
                crystal = "liquid"
            else:
                crystal = fvars.crystal
            df["crystal"] = crystal

            bin_values, count = aggregate(df["hexatic_order"])

            df = pd.DataFrame({
                "temperature": float(df["temperature"].values[0]),
                "pressure": float(df["pressure"].values[0]),
                "crystal": df["crystal"].values[0],
                "bins": bin_values,
                "count": count,
                "probability": count * (BINS[1] - BINS[0]),
            })
            df["crystal"] = df["crystal"].astype(
                CategoricalDtype(
                    categories=["SquareCircle", "HexagonalCircle", "liquid"]))
            dst.append("ordering", df)
Beispiel #2
0
def read_all_files(pathname: Path,
                   index: int = 0,
                   pattern: str = "dump-Trimer-*.gsd"
                   ) -> List[Tuple[Variables, HoomdFrame]]:
    """Read all the gsd files from a directory given a pattern.

    A utility function for getting reading all the gsd files from a given directory,
    with the ability to use a pattern to match a subset.

    Args:
        pathname: The directory from which the files will be read
        index: The index of the snapshot to read for each trajectory.
        pattern: The pattern passed to the glob function to match.

    Returns:
        A list of tuples containing the variables for a configuration, established from the filename,
        along with the frame.

    """
    pathname = Path(pathname)
    snapshots = []
    for filename in sorted(glob.glob(str(pathname / pattern))):
        logger.debug("Reading %s", Path(filename).stem)
        with gsd.hoomd.open(str(filename)) as trj:
            try:
                snapshots.append(
                    (get_filename_vars(filename), HoomdFrame(trj[index])))
            except IndexError:
                continue
    if not snapshots:
        logger.warning(
            "There were no files found with a configuration at index %s",
            index)
    return snapshots
def collate(output, infiles):
    with pd.HDFStore(output) as dst:
        for file in infiles:
            file = Path(file)
            print(file)
            if file.suffix == ".h5":
                with pd.HDFStore(file) as src:
                    df = src.get("ordering")
            elif file.suffix == ".csv":
                df = pd.read_csv(file)
                df = df.rename(columns={"orient_order": "orientational_order"})
                fvars = get_filename_vars(file)
                df["temperature"] = float(fvars.temperature)
                df["pressure"] = float(fvars.pressure)
                if fvars.crystal is None:
                    crystal = "liquid"
                else:
                    crystal = fvars.crystal
                df["crystal"] = crystal
            else:
                raise ValueError("Filetype is not supported")

            bin_values, count = aggregate(df["orientational_order"])

            df = pd.DataFrame({
                "temperature": float(df["temperature"].values[0]),
                "pressure": float(df["pressure"].values[0]),
                "crystal": df["crystal"].values[0],
                "bins": bin_values,
                "count": count,
                "probability": count * (BINS[1] - BINS[0]),
            })
            df["crystal"] = df["crystal"].astype(
                CategoricalDtype(categories=["p2", "p2gg", "pg", "liquid"]))
            dst.append("ordering", df)
Beispiel #4
0
def read_all_files(
    pathname: Path, index: int = 0, pattern: str = "dump-Trimer-*.gsd"
) -> List[Tuple[Variables, HoomdFrame]]:
    pathname = Path(pathname)
    snapshots = []
    for filename in glob.glob(str(pathname / pattern)):
        logger.debug("Reading %s", Path(filename).stem)
        with gsd.hoomd.open(str(filename)) as trj:
            try:
                snapshots.append((get_filename_vars(filename), HoomdFrame(trj[index])))
            except IndexError:
                continue
    if not snapshots:
        logger.warning(
            "There were no files found with a configuration at index %s", index
        )
    return snapshots
Beispiel #5
0
def test_get_filename_vars(prefix, mol, press, temp, crys, swapped):
    if swapped:
        if crys is None:
            fname = f"{prefix}{mol}-T{temp}-P{press}.gsd"
        else:
            fname = f"{prefix}{mol}-T{temp}-P{press}-{crys}.gsd"
    else:
        if crys is None:
            fname = f"{prefix}{mol}-P{press}-T{temp}.gsd"
        else:
            fname = f"{prefix}{mol}-P{press}-T{temp}-{crys}.gsd"

    var = get_filename_vars(fname)
    assert isinstance(var.temperature, str)
    assert var.temperature == temp
    assert isinstance(var.pressure, str)
    assert var.pressure == press
    assert isinstance(var.crystal, type(crys))
    assert var.crystal == crys
Beispiel #6
0
def read_all_files(directory: Path,
                   index: int = 0,
                   glob: str = "dump-*") -> List[SnapshotData]:
    directory = Path(directory)
    snapshots = []
    for file in directory.glob(glob):
        with gsd.hoomd.open(str(file), "rb") as trj:
            try:
                snap = HoomdFrame(trj[index])
            except IndexError:
                logger.warning(
                    "Index %d in input file %s doesn't exist, continuing...",
                    index,
                    file.name,
                )
            snapshots.append(
                SnapshotData.from_variables(snap,
                                            util.get_filename_vars(file)))
    return snapshots
def analyse(infile, outfile):
    dataframes = []
    file_vars = get_filename_vars(infile)
    crystal = file_vars.crystal
    if crystal is None:
        crystal = "liquid"
    for snap in open_trajectory(infile, progressbar=True):
        orientational_order = order.orientational_order(
            snap.box, snap.position, snap.orientation)
        df = pd.DataFrame({
            "molecule": np.arange(snap.num_mols),
            "orientational_order": orientational_order,
            "temperature": float(file_vars.temperature),
            "pressure": float(file_vars.pressure),
            "crystal": crystal,
        })
        df["crystal"] = df["crystal"].astype("category")
        dataframes.append(df)
    with pd.HDFStore(outfile) as dst:
        dst.append("ordering", pd.concat(dataframes))
def thermodynamics(outfile, infiles):
    dfs = []
    for filename in infiles:
        fvars = get_filename_vars(filename)

        df = pd.read_csv(filename, sep="\t")
        # All the values are written to the same output file, so make sure there is only
        # a single trajectory worth of values.
        df = df.drop_duplicates("timestep", keep="last")
        # We want quantities for each
        df = df.div(df.N, axis=0)

        # Take the second half of the values to ensure there is no issue with
        # equilibration
        df = df.iloc[len(df) // 2:, :]

        # Calculate Total Energy
        df["total_energy"] = df["kinetic_energy"] + df["potential_energy"]

        # Calculate enthalpy.
        # This is the total energy (potential + kinetic) + the configuration energy (pV)
        # The multiplication by N is because the pressure was also divided by N above.
        df["enthalpy"] = (df["potential_energy"] + df["kinetic_energy"] +
                          df["pressure"] * df["volume"] * df.N)

        if fvars.crystal is not None:
            df["crystal"] = fvars.crystal
        else:
            df["crystal"] = "liquid"
        df["pressure"] = float(fvars.pressure)
        df["temperature"] = float(fvars.temperature)
        df = df.set_index(["pressure", "temperature", "crystal"])

        # Perform aggregations on the dataframe, making it much easier to work with.
        df = df.groupby(["pressure", "temperature",
                         "crystal"]).agg(["mean", "std"])

        dfs.append(df)

    pd.concat(dfs).to_hdf(outfile, "thermo")
Beispiel #9
0
def collate(output, infiles):
    with pd.HDFStore(output, "w") as dst:
        key = "fractions"
        for file in infiles:
            print(file)
            if file.endswith(".h5"):
                with pd.HDFStore(file) as src:
                    df = src.get(key)
            elif file.endswith(".csv"):
                try:
                    df = pd.read_csv(file)
                except pd.errors.EmptyDataError:
                    logger.warn("File %s is empty.", file)
                    continue

                df["class"] = df["class"].astype(
                    pd.CategoricalDtype(categories=["Liquid", "P2", "P2GG", "PG"])
                )
                df = (
                    df.groupby(["timestep", "class"])["area"].sum().to_frame().unstack()
                )
                df.columns = [c[1] for c in df.columns]
                df["volume"] = df.loc[:, df.columns != "Liquid"].sum(axis=1)
                df = df.reset_index()

                file_vars = get_filename_vars(file)
                df = df.rename(columns={"area": "volume"})
                df["crystal"] = file_vars.crystal
                df["temperature"] = float(file_vars.temperature)
                df["pressure"] = float(file_vars.pressure)
                if file_vars.iteration_id is not None:
                    df["iter_id"] = int(file_vars.iteration_id)

            df["crystal"] = df["crystal"].astype(
                CategoricalDtype(categories=["p2", "pg", "p2gg"])
            )
            df["time"] = df["timestep"] * 0.005
            dst.append(key, df)
Beispiel #10
0
def test_filename_vars_id(filename, expected):
    variables = get_filename_vars(filename)
    assert variables == expected
Beispiel #11
0
def compute_crystal_growth(
    infile: Path, outfile: Path = None, skip_frames: int = 100
) -> Optional[pd.DataFrame]:
    fvars = get_filename_vars(infile)
    order_list = []

    ml_order = order.create_ml_ordering(KNNModel)
    voronoi = freud.voronoi.Voronoi(freud.box.Box(10, 10), buff=4)
    for index, snap in enumerate(open_trajectory(infile, progressbar=True)):
        if index % skip_frames != 0:
            continue

        classification = ml_order(snap)
        labels = spatial_clustering(snap, classification)

        # The crystal should always be smaller, so ensure it has label 1
        if sum(labels) > len(labels) / 2:
            labels = -labels + 1

        # Compute Voroni cells and volumes
        voronoi.compute(snap.position, box=snap.box)
        voronoi.computeVolumes()
        # Sum the volumes of the crystalline molecules
        voronoi_volume = np.sum(voronoi.volumes[labels == 1])

        hull_area = 0.0
        hull_volume = 0.0
        # Only compute ConvexHull for at least 5 particles
        if np.sum(labels == 1) > 5:
            hull = ConvexHull(snap.position[labels == 1, :2])
            hull_area = hull.area
            hull_volume = hull.volume

        if fvars.iteration_id is None:
            iter_id = 1
        else:
            iter_id = fvars.iteration_id
        states = CrystalFractions.from_ordering(classification)
        df = {
            "temperature": float(fvars.temperature),
            "pressure": float(fvars.pressure),
            "crystal": fvars.crystal,
            "iter_id": int(iter_id),
            "liq": float(states.liquid),
            "p2": float(states.p2),
            "p2gg": float(states.p2gg),
            "pg": float(states.pg),
            "surface_area": float(hull_area),
            "volume": float(hull_volume),
            "voronoi_volume": float(voronoi_volume),
            "timestep": int(snap.timestep),
            "time": int(snap.timestep) * 0.005,
        }

        order_list.append(df)

        order_df = pd.DataFrame.from_records(order_list)
        order_df.time = order_df.time.astype(np.uint32)
        logger.debug("Value of outfile is: %s", outfile)
    if outfile is None:
        return order_df

    order_df.to_hdf(outfile, "fractions", format="table", append=True, min_itemsize=4)
    return None