def main(): # Parse command-line arguments. parser = argparse.ArgumentParser(description='Build a Pandas dataset from local ThermoML Archive mirror.') parser.add_argument('--journalprefix', dest='journalprefix', metavar='JOURNALPREFIX', action='store', type=str, default=None, help='journal prefix to use in globbing *.xml files') parser.add_argument('--path', dest='path', metavar='path', action='store', type=str, default=None, help='path to local ThermoML Archive mirror') args = parser.parse_args() # Get location of local ThermoML Archive mirror. XML_PATH = os.path.join(os.environ["HOME"], '.thermoml') # DEFAULT LOCATION if args.path != None: XML_PATH = args['path'] elif 'THERMOML_PATH' in os.environ: XML_PATH = os.environ["THERMOML_PATH"] # Get path for XML files. if args.journalprefix != None: filenames = glob.glob("%s/%s*.xml" % (XML_PATH, args.journalprefix)) else: filenames = glob.glob("%s/*.xml" % XML_PATH) # Process data. from thermopyl.utils import build_pandas_dataframe [data, compound_dict] = build_pandas_dataframe(filenames) data.to_hdf("%s/data.h5" % XML_PATH, 'data') compound_dict.to_hdf("%s/compound_name_to_formula.h5" % XML_PATH, 'data') return
def test_build_pandas_dataframe(): tmpdir = tempfile.mkdtemp() from thermopyl.utils import build_pandas_dataframe, pandas_dataframe # Generate dataframe filenames = [get_fn("je8006138.xml")] [data, compounds] = build_pandas_dataframe(filenames) # Write as HDF5 data.to_hdf(os.path.join(tmpdir, 'data.h5'), 'data') compounds.to_hdf(os.path.join(tmpdir, 'compound_name_to_formula.h5'), 'data') # Read dataframe df = pandas_dataframe(thermoml_path=tmpdir) # Clean up tmpdir import shutil shutil.rmtree(tmpdir)
def main(): # Parse command-line arguments. parser = argparse.ArgumentParser( description='Build a Pandas dataset from local ThermoML Archive mirror.' ) parser.add_argument('--journalprefix', dest='journalprefix', metavar='JOURNALPREFIX', action='store', type=str, default=None, help='journal prefix to use in globbing *.xml files') parser.add_argument('--path', dest='path', metavar='path', action='store', type=str, default=None, help='path to local ThermoML Archive mirror') args = parser.parse_args() # Get location of local ThermoML Archive mirror. XML_PATH = os.path.join(os.environ["HOME"], '.thermoml') # DEFAULT LOCATION if args.path != None: XML_PATH = args['path'] elif 'THERMOML_PATH' in os.environ: XML_PATH = os.environ["THERMOML_PATH"] # Get path for XML files. if args.journalprefix != None: filenames = glob.glob("%s/%s*.xml" % (XML_PATH, args.journalprefix)) else: filenames = glob.glob("%s/*.xml" % XML_PATH) # Process data. from thermopyl.utils import build_pandas_dataframe [data, compound_dict] = build_pandas_dataframe(filenames) data.to_hdf("%s/data.h5" % XML_PATH, 'data') compound_dict.to_hdf("%s/compound_name_to_formula.h5" % XML_PATH, 'data') return