Ejemplo n.º 1
0
def main():
    property_names_units = get_property_names_units()
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_file_dir",
                        type=str,
                        required=True,
                        help="the input file directory")
    parser.add_argument("--output_json_path",
                        type=str,
                        required=True,
                        help="the output json file path")
    args = parser.parse_args()
    input_path = args.input_file_dir
    output_path = args.output_json_path
    files = os.listdir(input_path)
    f = open(output_path, 'w')
    num = 0
    for file in files:  #convert each file to pd.DataFrame then output with pif
        if '.xyz' in file:
            df = file_to_pd(file, input_path, property_names_units)
            if df is not None:
                chemical_system = pd_to_pifmat(df)
                pif.dump(chemical_system, f, indent=4)
                num += 1
                if num % 5 == 0:
                    print('%d files have been processed' % num)
    print('In total %d files have been processed' % num)
    f.close()
Ejemplo n.º 2
0
def test_upload_pif():
    """
    Tests that a PIF can be created, serialized, uploaded
    then downloaded and deserialized
    """
    pif = System()
    pif.id = 0
    uid = random_string()
    pif.uid = uid

    with open("tmp.json", "w") as fp:
        dump(pif, fp)
    assert client.upload(dataset_id, "tmp.json").successful()
    tries = 0
    while True:
        try:
            pif = client.get_pif(dataset_id, uid)
            break
        except ResourceNotFoundException:
            if tries < 10:
                tries += 1
                time.sleep(1)
            else:
                raise

    status = client.get_ingest_status(dataset_id)
    assert status == "Finished"
    with open("tmp.json", "r") as fp:
        assert json.loads(fp.read())["uid"] == pif.uid
Ejemplo n.º 3
0
    def run(self):
        cl = self.inputs['client'] 
        dsid = self.inputs['dsid'] 
        p = self.inputs['pif']        
        json_dir = self.inputs['json_dirpath']
        json_file = self.inputs['json_filename']
        if cl is None or dsid is None or p is None \
        or json_dir is None or json_file is None:
            return
        if not os.path.splitext(json_file)[1] == 'json':
            json_file = json_file+'.json'
        json_file = os.path.join(json_dir,json_file)

        json_flag = self.inputs['keep_json']
        ship_flag = self.inputs['ship_flag']
        try:
            # make p an array of pifs to get a big json that has all records
            pif.dump(p, open(json_file,'w'))
            if ship_flag:
                r = cl.upload_file(json_file,dataset_id = dsid)
            else:
                r = 'dry run: no shipment occurred. pif object: {}'.format(pif.dumps(p))
            if not json_flag:
                os.remove(json_file) 
        except Exception as ex:
            r = 'An error occurred while shipping. Error message: {}'.format(ex.message)
        self.outputs['response'] = r
Ejemplo n.º 4
0
def main ():
    global args
    samples = []
    # ####################################
    # read
    # ####################################
    for source in args.sources:
        try:
            subset = {
                'faustson-plate1-build1' : P001B001().samples,
                'faustson-plate2-build1' : P002B001().samples,
                'faustson-plate3-build1' : P003B001().samples,
                'faustson-plate4-build1' : P004B001().samples,
                'faustson-plate5-build1' : P005B001().samples,
                'faustson-plate5-build2' : P005B002().samples,
                'faustson-plate6-build1' : P006B001().samples
            }[source.lower()]
        except KeyError:
            raise ValueError('{source:} is not a recognized source.'.format(
                source=source))
        samples.extend(subset)
    # ####################################
    # write
    # ####################################
    # To improve traceability of the samples and their history, each sample
    # should be uploaded separately, i.e. as a separate file. So rather than
    # storing these in a single file, create a directory to store each sample
    # as a separate file in that directory, then tar and zip the directory.
    directory = args.output
    directory = make_directory(directory, retry=0)
    for sample in samples:
        # generate JSON string
        jstr = pif.dumps(sample, indent=4)
        # create a filename from the contents of the record
        try:
            ofile = filename_from(jstr, directory=directory)
        except IOError:
            msg = 'Sample {} is duplicated.'.format(ofile)
            if not args.duplicate_error:
                sys.stdout.write('WARNING: {}' \
                                 'Skipping.\n'.format(msg))
                continue
            else:
                msg = 'ERROR: {} To skip duplicates, invoke the ' \
                      '--duplicate-warning flag.'.format(msg)
                shutil.rmtree(directory)
                raise IOError(msg)
        # Add the UID to the record
        urn = get_urn(jstr)
        sample.uid = urn
        # write the file
        with open(ofile, 'w') as ofs:
            pif.dump(sample, ofs)
    # tarball and gzip the new directory
    if args.create_archive:
        tarball = '{}.tgz'.format(directory)
        with tarfile.open(tarball, 'w:gz') as tar:
            tar.add(directory)
        shutil.rmtree(directory)
Ejemplo n.º 5
0
def test_file_list(mark10_no_stress, generate_output):
    pifs = converter([SOURCE])
    if generate_output:
        with open('{}/data/mark10-no-stress.json'.format(HERE), 'w') as ofs:
            pif.dump(pifs, ofs, sort_keys=True)
        assert False
    pifs = pif.dumps(pifs, sort_keys=True).strip()
    assert pifs == mark10_no_stress
Ejemplo n.º 6
0
def test_mises_with_time(aramis_mises_with_time, generate_output):
    pifs = converter(MISES, timestep=0.5)
    if generate_output:
        with open('{}/data/aramis-mises-with-time.json'.format(HERE),
                  'w') as ofs:
            pif.dump(pifs, ofs, sort_keys=True)
        assert False
    pifs = pif.dumps(pifs, sort_keys=True)
    assert pifs == aramis_mises_with_time
Ejemplo n.º 7
0
def test_ey_strain_with_time(aramis_ey_strain_with_time, generate_output):
    pifs = converter(EYSTRAIN, timestep=0.5)
    if generate_output:
        with open('{}/data/aramis-ey_strain-with-time.json'.format(HERE),
                  'w') as ofs:
            pif.dump(pifs, ofs, sort_keys=True)
        assert False
    pifs = pif.dumps(pifs, sort_keys=True)
    assert pifs == aramis_ey_strain_with_time
Ejemplo n.º 8
0
def make_pif(filename):
    """
	Method to turn a CSV file to PIF for this BMG-based dataset. Can edit it to pull out whatever properties needed.
	
	:param filename: Path to the CSV file you'd like to convert to PIF.
	:type filename: str
	:return: Path to the JSON output from converting to PIF
	:rtype: str
	"""
    # Read in csv file using pandas to make df
    new_data = pd.read_csv(filename)

    # Pull out the desired properties and turn them to lists.
    # Edit this to include each of the properties you would like to upload for
    # small numbers of columns in your csv or use the commented out block of code below to get all properties you
    # would like to upload for a larger number of files. Requires some lists to be made for storing the values associated
    # with said properties that's not written here.
    form = new_data['formula']
    energy = new_data['PROPERTY: Nearest DFT Formation Energy (eV)']
    tg = new_data['PROPERTY: Tg (K)']

    # headers = []
    # with open(filename, "rb") as f:
    # reader = csv.reader(f, delimiter=",")
    # for i in enumerate(reader):
    # headers.append(i)
    # headers = headers[0]

    input = []

    # Make pifs
    for i in range(0, len(form)):
        # Create a new chemical system (from the pypif package)
        chemical_system = ChemicalSystem()
        # Set the formula
        chemical_system.chemical_formula = form[i]
        # Create some properties to add
        # If you have conditions, the format is slightly different and the appropriate info can
        # be found the on the Citrination knowledgebase
        dft_energy = Property(name='Nearest DFT Formation Energy',
                              units='eV',
                              scalars=float(energy[i]))
        tg_prop = Property(name='Tg', units='K', scalars=float(tg[i]))
        # add the properties to the chemical system
        chemical_system.properties = [dft_energy, tg_prop]
        # add the system to the list
        input.append(chemical_system)

    # Dictionary to PIF
    # Write the string that was dumped to a json using pif.dump from pypif
    outfile = "pif.json"
    with open(outfile, 'w') as fp:
        pif.dump(input, fp)

    # Return the name of the file you wrote the pif to
    return outfile
Ejemplo n.º 9
0
def test_stress(mark10_with_stress, generate_output):
    area=12.9
    units='mm^2'
    pifs = converter(SOURCE, area=area)
    if generate_output:
        with open('{}/data/mark10-with-stress.json'.format(HERE), 'w') as ofs:
            pif.dump(pifs, ofs, sort_keys=True)
        assert False
    pifs = pif.dumps(pifs, sort_keys=True).strip()
    assert pifs == mark10_with_stress
def test_upload_pif():
    client = CitrinationClient(environ['CITRINATION_API_KEY'], environ['CITRINATION_SITE'])
    dataset = loads(client.create_data_set(name="Tutorial dataset", description="Dataset for tutorial", share=0).content.decode('utf-8'))['id']
    pif = System()
    pif.id = 0

    with open("tmp.json", "w") as fp:
        dump(pif, fp)
    response = loads(client.upload_file("tmp.json", dataset))
    assert response["message"] == "Upload is complete."
Ejemplo n.º 11
0
 def run(self):
     p = self.inputs['pif']
     dp = self.inputs['dirpath']
     fn = self.inputs['filename']
     if dp is None or fn is None or p is None:
         return
     if not os.path.splitext(fn)[1] == 'json':
         fn = fn + '.json'
     json_file = os.path.join(dp, fn)
     pif.dump(p, open(json_file, 'w'))
Ejemplo n.º 12
0
	def test_linkages_sagittariidae(self):
		# try:
		with open('data/package.json') as ifs:
			pifdata = pif.load(ifs)
		add_link = sagittariidae.link_factory(
			projectID='nq3X4-concept-inconel718',
			host='http://sagittariidae.adapt.mines.edu')
		for p in pifdata:
			add_link(p)
		with open('delme.json', 'w') as ofs:
			pif.dump(pifdata, ofs)
Ejemplo n.º 13
0
def test_frame_to_pif(frame, file='./test.json', nest_sub_systems=False):
    """
    Save a PIF file from a pandas DataFrame.

    Args:
        frame (PifFrame) DataFrame containing System entries.
        file (str) File name to write to.
    """
    systems = frame.to_pif_systems(nest_sub_systems=nest_sub_systems)
    with open(file, 'w') as fp:
        pif.dump(systems, fp, indent=4)
Ejemplo n.º 14
0
def test_upload_pif():
    client = CitrinationClient(environ['CITRINATION_API_KEY'], 'https://stage.citrination.com')
    dataset = loads(client.create_data_set(name="Tutorial dataset", description="Dataset for tutorial", share=0).content.decode('utf-8'))['id']
    pif = System()
    pif.id = 0

    with TemporaryDirectory() as tmpdir:
        tempname = join(tmpdir, "pif.json")
        with open(tempname, "w") as fp:
            dump(pif, fp)
        response = loads(client.upload_file(tempname, dataset))
    assert response["message"] == "Upload is complete."
Ejemplo n.º 15
0
def test_upload_pif():
    client = CitrinationClient(environ['CITRINATION_API_KEY'],
                               environ['CITRINATION_SITE'])
    dataset = loads(
        client.create_data_set(name="Tutorial dataset",
                               description="Dataset for tutorial",
                               share=0).content.decode('utf-8'))['id']
    pif = System()
    pif.id = 0

    with open("tmp.json", "w") as fp:
        dump(pif, fp)
    response = loads(client.upload_file("tmp.json", dataset))
    assert response["message"] == "Upload is complete."
Ejemplo n.º 16
0
 def ship_dataset(self,pifs):
     # Create the data set
     response = self.ctn_client.create_data_set()
     dsid = response.json()['id']
     # TODO: Note that the entire data set can be one json,
     # of an array of pif records, and this will lead to a faster upload.
     for p in pifs:
         try:
             json_file = pawstools.scratchdir+'/'+p.uid+'.json'
             pif.dump(p, open(json_file,'w'))
             #print 'add DATA SET {} to tags'.format(dsid)
             #p.tags.append('DATA SET {}'.format(dsid))
             #print 'dump {} to data set {}'.format(json_file,dsid)
             cl.upload_file(json_file,data_set_id = dsid)
             #print 'NOT SHIPPING {} (this is a test)'.format(json_file)
             self.return_codes[p.uid]=1
             # delete dataset json
             #print 'deleting file {}'.format(json_file)
             os.remove(json_file) 
         except:
             # TODO: Pass along some return code from the server?
             self.return_codes[p.uid]=-1
def make_pif(filename):
	"""
	Method to turn a CSV file to PIF for this BMG-based dataset. Can edit it to pull out whatever properties needed.
	
	:param filename: Path to the CSV file you'd like to convert to PIF.
	:type filename: str
	:return: Path to the JSON output from converting to PIF
	:rtype: str
	"""
	# Read in csv file using pandas to make df
	new_data = pd.read_csv(filename)
	# Pull out the desired properties and turn them to lists.
	form = new_data['formula']
	energy = new_data['PROPERTY: Nearest DFT Formation Energy (eV)']
	tg = new_data['PROPERTY: Tg (K)']
	tl = new_data['PROPERTY: Tl (K)']
	tx = new_data['PROPERTY: Tx (K)']
	
	input = []
	
	# Make pifs
	for i in range(0, len(form)):
		chemical_system = ChemicalSystem()
		chemical_system.chemical_formula = form[i]
		dft_energy = Property(name = 'Nearest DFT Formation Energy', units = 'eV', scalars = float(energy[i]))
		tg_prop = Property(name = 'Tg', units = 'K', scalars = float(tg[i]))
		tl_prop = Property(name = 'Tl', units = 'K', scalars = float(tl[i]))
		tx_prop = Property(name = 'Tx', units = 'K', scalars = float(tx[i]))
		chemical_system.properties = [dft_energy, tg_prop, tl_prop, tx_prop]
		input.append(chemical_system)
	
	# Dictionary to PIF
	# Write the string that was dumped to a json
	outfile = "pif.json"
	with open(outfile, 'w') as fp:
		pif.dump(input, fp)
		
	return outfile
Ejemplo n.º 18
0
def _handle_pif(path, ingest_name, convert_args, enrich_args, ingest_manager):
    """Ingest and enrich pifs from a path, returning affected paths"""
    # Run an ingest extension
    pifs = ingest_manager.run_extension(ingest_name, path, convert_args)

    # Perform enrichment
    add_tags(pifs, enrich_args['tags'])
    add_license(pifs, enrich_args['license'])
    add_contact(pifs, enrich_args['contact'])

    # Write the pif
    if os.path.isfile(path):
        pif_name = "{}_{}".format(path, "pif.json")
        res = [path, pif_name]
    else:
        pif_name = os.path.join(path, "pif.json")
        res = [path]

    with open(pif_name, "w") as f:
        pif.dump(pifs, f, indent=2)
    logging.info("Created pif at {}".format(pif_name))

    return res
Ejemplo n.º 19
0
    if file_type[2] == "tif":
        jpeg_path = convert_tif_to_jpeg(image_path)
        image_to_pif(jpeg_path)
        return

    my_pif = ChemicalSystem()
    my_pif.ids = [os.path.basename(image_path).split("_")[0]]
    my_pif.names = [os.path.basename(image_path).rpartition(".")[0]]
    my_pif.properties = [
        Property(name="SEM",
                 files=FileReference(mime_type="image/" + file_type[2],
                                     relative_path=image_path))
    ]

    return [my_pif]


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('images',
                        nargs='*',
                        help='path to SEM images (.tif, .jpeg, .bmp)')

    args = parser.parse_args()

    for f in args.images:
        system = image_to_pif(f)
        f_out = f.rpartition(".")[0] + ".json"
        print(f_out)
        pif.dump(system, open(f_out, "w"), indent=4)
Ejemplo n.º 20
0
        # Even if one of the numbers is not convertible to float, skip line
        try:
            floats = [float(num) for num in x_y_e]
        except ValueError:
            continue

        x.append(Scalar(value=x_y_e[0]))
        y.append(Scalar(value=x_y_e[1], uncertainty=x_y_e[2]))

    intensity = Property(name="Intensity", scalars=y,
                         conditions=[Value(name="2$\\theta$", scalars=x, units="degrees")],
                         methods=[Method(instruments=
                                         Instrument(name="11-BM", url="http://11bm.xray.aps.anl.gov/"))]
                         )
    chem_sys.properties.append(intensity)

    return chem_sys


if __name__ == "__main__":
    # file_name = "LuFe2O4_700Air_hold3-00059.xye"
    file_name = "11bmb_2144_AA0037_YbFeO_red.xye"
    # file_name = "NOM_LuFe2O4_Ex_situ_20C-5.xye"
    # file_name = "PG3_27954-3.xye"
    chem_system = convert(files=["../test_files/" + file_name], sample_id="001",
                          chemical_formula="NaCl")

    with open('../test_files/' + file_name.replace('.xye', '.json'), 'w') as fw:
        pif.dump(chem_system, fw, indent=4)
Ejemplo n.º 21
0
def to_pif(dataframe, filename):
    """
    Constructs a PIF-formatted data file appropriately formatted for import
    into the citrination platform.

    Several columns are treated as special:

        Sample Name --> uid
        Parent Sample Name --> subsystem.uid
        Contact --> contacts

    :param df: pandas.DataFrame containing the tabulated information
        collected from previous steps.
    :return: None
    """
    from pypif import pif

    def to_system(row):
        def url_friendly(name):
            return re.sub(r"\W", "_", name)

        def wolf_contact():
            return pif.Person(name='JP H. Paul',
                              email='*****@*****.**',
                              tags='Lincoln Electric (Wolf Robotics)')

        def cmu_contact():
            return pif.Person(name='Anthony Rollett',
                              email='*****@*****.**',
                              tags='Carnegie Mellon University')

        def mines_contact():
            return pif.Person(name='Branden Kappes',
                              email='*****@*****.**',
                              tags='Colorado School of Mines')

        def lmco_contact():
            return pif.Person(name='Edward A. Pierson',
                              email='*****@*****.**',
                              tags='Lockheed Martin Corporation')

        # Every PIF record is a System object
        system = pif.System()
        # create a unique identifier. This must be URL friendly.
        system.uid = url_friendly(str(row['Sample Name']))
        # name the PIF
        system.names = str(row['Sample Name'])
        # record the parent sample name
        system.sub_systems = pif.System(uid=url_friendly(
            str(row['Parent Sample Name'])),
                                        names=str(row['Sample Name']))
        # set the contact information. By default, I set this as LMCO.
        system.contacts = {
            'Wolf': wolf_contact,
            'CMU': cmu_contact,
            'Mines': mines_contact,
            'LM': lmco_contact
        }.get(row['Contact'], lmco_contact)()

        # Certain fields we treat as special
        special = ['Sample Name', 'Parent Sample Name', 'Contact']
        filelist = None
        for column, value in row.items():
            # special columns have already been handled.
            if column in special:
                continue
            # no need to record empty record.
            if is_null(value):
                continue
            # handle FILEs.
            if column.startswith('FILE'):
                # add the FILE column to the list of special columns
                # so we don't add this as a property.
                special.append(column)
                # split on colon -- we don't need "FILE" in the name
                split = column.split(':')
                # Convert the filename (or the string-representation of
                # a list of filenames, e.g. [file1.png, file2.png]) to
                # a list of files
                value = str(value).strip("[]").split(",")
                # create a file reference for each file and store these as
                # a list of FileReference objects.
                for path in value:
                    file = pif.FileReference(relative_path=str(path),
                                             tags=':'.join(split[1:]).strip())
                    try:
                        filelist.append(file)
                    except AttributeError:
                        filelist = [file]
        # create a special property that holds all the files
        pty = pif.Property(name="Files", files=filelist)
        try:
            system.properties.append(pty)
        except AttributeError:
            # if this is the first property, append to a None value will
            # fail. Handle this edge case.
            system.properties = [pty]
        # everything else is a property
        for column, value in row.items():
            # special columns have already been handled.
            if column in special:
                continue
            # ignore this value if empty
            if is_null(value):
                continue
            # scalar can only contain lists, dict, string, or Number
            value = {
                pd.Timestamp: str,
                tuple: list
            }.get(type(value), type(value))(value)
            # otherwise, construct a property value.
            try:
                pty = pif.Property(name=column, scalars=value)
            except:
                pty = pif.Property(name=column, scalars=str(value))
                # print(f"{column}: {value}")
                # raise
            # units are stored, by convention in parentheses, e.g.
            # laser speed (mm/s). This regular expression extracts the the
            # last term surrounded by parentheses.
            try:
                pty.units = re.search('.*\(([^)]+)\)\s*$', column).group(1)
            except AttributeError:
                # no units were found...
                pass
            try:
                # add the property
                system.properties.append(pty)
            except AttributeError:
                # if this is the first property, append to a None value will
                # fail. Handle this edge case.
                system.properties = [pty]
        # done
        return system

    dataframe = dataframe.fillna('')
    records = [to_system(row) for i, row in dataframe.iterrows()]

    with open(filename, 'w') as ofs:
        pif.dump(records, ofs)
            else:
                raise IOError(
                    'Filetype provided is not compatible with this parser. Please upload a .csv or .tsv file.\n'
                )

            if not _check_table_size(table, (100 * 100000 + 1)):
                continue

            input_file.seek(0)

            for i, row in enumerate(table):

                if not any(row):
                    continue

                if i == 0:
                    headers = row
                else:
                    row_pif = create_pif(headers, row)

                    yield row_pif


if __name__ == '__main__':
    result = convert(files=[sys.argv[1]])

    with open(
            sys.argv[1].replace('.{}'.format(sys.argv[1].rpartition('.')[-1]),
                                '-pif.json'), 'w') as output_file:
        pif.dump(list(result), output_file, indent=2)
Ejemplo n.º 23
0
from pypif import pif
from quality_made_xlsx.read_excel import read_excel
from quality_made_xlsx.process_archive import process_archive


def convert(files=[]):
    """
    Convert files into a pif

    :param files: to convert
    :param important_argument: an important argument, must be provided
    :param whatever_argument: a less important argument with default 1
    :param do_some_extra_thing:
    :param kwargs: any other arguments
    :return: the pif produced by this conversion
    """
    return read_excel(files[0])


if __name__ == '__main__':
    import sys
    with open(sys.argv[1].replace('.xlsx', '-pif.json'), 'w') as ofs:
        pif.dump(convert(files=[sys.argv[1]]), ofs, indent=4)
Ejemplo n.º 24
0
                                     spectra_basename +
                                     file_index(scan_num[i]) + '_Qchi.png')),
        Property(name='XRD Intensity',
                 scalars=IntAve,
                 conditions=[
                     Value(name='Q, (Angstrom$^{-1}$)', scalars=Qlist),
                     Value(name='Temperature',
                           scalars='25',
                           units='$^\\circ$C'),
                     Value(name='Exposure time', scalars='30', units='seconds')
                 ],
                 methods=Method(instruments=(Instrument(
                     name='MARCCD, 2048 pixels x 2048 pixels, 79 microns')))),
        Property(name='Maximum intensity/average intensity',
                 scalars=round(np.nanmax(IntAve) / np.nanmean(IntAve), 2)),
        Property(name='Full width half maximum (FWHM) of FSDP',
                 scalars=round(peak_width[i], 2)),
        Property(name='First sharp diffraction peak (FSDP) position',
                 scalars=round(peak_position[i], 2)),
        Property(name='Textured', scalars=0)
    ]

    # specify a unique uid for each sample
    alloy.uid = 'Fe' + str(int(Fe[i])) + 'Ti' + str(int(Ti[i])) + 'Nb' + str(
        int(Nb[i])) + 'low_power' + str(scan_num[i])

    # print pif.dumps(alloy)
    alloys += [alloy]

pif.dump(alloys, open('..//..//data//Json_files_Citrine//SampleB2_21.json',
                      'w'))
Ejemplo n.º 25
0
def test_converter(generate_output):
    astm_pif = astm_converter([STRAIN, STRESS])
    if generate_output:
        with open('{}/data/astm-mark10-aramis.json'.format(HERE), 'w') as ofs:
            pif.dump(astm_pif, ofs)
Ejemplo n.º 26
0
def converter(*args):
    """
    Ingest .xyz files
    Args (one of these is required):
        listd: directory full path path and a comma separated list of filenames
            to process in that directory
        alld: directory full path path
        files: comma separated list of filenames with their full paths
    Returns: a pif chemical system (JSON-encoded text file) from each ingested .xyz file
    """
    parser = argparse.ArgumentParser(description='convert xyz file(s) to pif')
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument(
        '-l',
        '--listd',
        help=
        '2 arguments: the source directory full path and a comma separated list of filenames to process in that directory',
        nargs=2)
    group.add_argument(
        '-a',
        '--alld',
        help=
        '1 argument: the source directory full path path where all files of .xyz type reside',
        nargs=1)
    group.add_argument(
        '-f',
        '--files',
        help=
        '1 argument: a comma separated list of filenames with their full paths to process',
        nargs=1)

    args = parser.parse_args()

    # Several containers defined
    # stores the extracted data in pif
    my_pif = []

    # list of supplied filenames to be processed
    filename_list = []

    # same list with each file with its full path
    filename_fullpath_list = []

    # data source directory
    data_directory = []

    # check if destination directory existd and if not create one
    if not os.path.exists(DEST_DIR):
        os.makedirs(DEST_DIR)

    # clean destination directory
    for fileName in os.listdir(DEST_DIR):
        os.remove(DEST_DIR + "/" + fileName)

    # Depending on the command line input, process data and create a PIF file
    #
    # Below is the case when the user specifies a directory and a list of files to be processed
    # from that directory
    if args.listd:
        data_directory = args.listd[0]
        filename_list = [str(item) for item in args.listd[1].split(',')]
        for i in range(len(filename_list)):
            if check(filename_list[i]):
                continue
            filename_fullpath_list = data_directory + '/' + filename_list[i]
            my_pif = parser_xyz(filename_fullpath_list)
            with open(DEST_DIR + filename_list[i].replace('.xyz', '.json'),
                      'w') as fw:
                pif.dump(my_pif, fw, indent=4)

    # Belwo is the case when the user specifies a directory and all files
    # from that directory need to be processed
    elif args.alld:
        data_directory = str(args.alld[0])
        filename_list = os.listdir(data_directory)
        for i in range(len(filename_list)):
            filename_fullpath_list = data_directory + '/' + filename_list[i]
            check(str(filename_fullpath_list))
            my_pif = parser_xyz(str(filename_fullpath_list))
            with open(DEST_DIR + filename_list[i].replace('.xyz', '.json'),
                      'w') as fw:
                pif.dump(my_pif, fw, indent=4)

    # Below is the case when the users specifies a list of distinct files with their full path
    # that need to be processed
    elif args.files:
        filename_fullpath_list = [
            str(item) for item in args.files[0].split(',')
        ]
        for i in range(len(filename_fullpath_list)):
            filename_list = filename_fullpath_list[i].split('/')[-1]
            check(str(filename_fullpath_list[i]))
            my_pif = parser_xyz(str(filename_fullpath_list[i]))
            with open(DEST_DIR + filename_list.replace('.xyz', '.json'),
                      'w') as fw:
                pif.dump(my_pif, fw, indent=4)
    else:
        print("Nothing entered")
Ejemplo n.º 27
0
        return my_pif

    except IOError as e:
        print(e)
        print("RAW V4 FILE NOT PARSED")
        return None


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('files', nargs='*', help='path to XRD files (.raw, .txt)')

    args = parser.parse_args()

    for f in args.files:

        if ".txt" in f:
            print ("PARSING: {}".format(f))
            pifs = raw4_txt_to_pif(f)
            f_out = f.replace(".txt", ".json")
            print ("OUTPUT: {}".format(f_out))
            pif.dump(pifs, open(f_out, "w"), indent=4)

        if ".raw" in f:
            print ("PARSING: {}".format(f))
            pifs = raw_to_pif(f)
            if pifs:
                f_out = f.replace(".raw", ".json")
                print ("OUTPUT: {}".format(f_out))
                pif.dump(pifs, open(f_out, "w"), indent=4)
            print("Defect curves: ", v)

            if len(v) >= 2:
                intersection_points = calculate_intersect_points(v)
                print("INTERSECTION POINTS: ", intersection_points)
                low_energy_line = find_min_energy_overlap(v, intersection_points)
                print("LOWEST ENERGY LINE: ", low_energy_line)
                system.properties.append(Property(name="$\Delta$H_2", scalars=low_energy_line[0], conditions=[Value(name="E$_F$_2", scalars=low_energy_line[1])]))
            else:
                print("LOWEST ENERGY LINE: ", [[v[0][0][0], v[0][1][0]], [v[0][0][1], v[0][1][1]]])
                system.properties.append(Property(name="$\Delta$H_2", scalars=[v[0][0][1], v[0][1][1]], conditions=[Value(name="E$_F$_2", scalars=[v[0][0][0], v[0][1][0]])]))

        systems.append(system)
        print("=====")

    return systems


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("csv", nargs="*", help="path to template file")

    args = parser.parse_args()

    for f in args.csv:
        pifs = parse_template(f)
        outfile = f.replace(".csv", ".json")
        pif.dump(pifs, open(outfile, "w"))
        print("PIF DUMPED: ", outfile)

Ejemplo n.º 29
0
import sys
from pypif import pif
from .parse_cif_pmg import parse_cif


def convert(files=[], **kwargs):
    """
    Convert files into a pif
    :param files: to convert
    :param kwargs: any other arguments
    :return: the pif produced by this conversion
    """
    print('Converting {} CIFs'.format(len(files)))
    systems = []
    for f in files:
        converted_pif = parse_cif(f)
        if converted_pif:
            systems.append(converted_pif)

    return systems


if __name__ == '__main__':
    with open(sys.argv[1].replace('.cif', '-pif.json'), 'w') as output_file:
        pif.dump(convert(files=[sys.argv[1]]), output_file, indent=4)
Ejemplo n.º 30
0
def harmonic_max_json(xyz_panda):
    my_pif = System()
    b = str(xyz_panda.loc[1, 0])
    a = b.strip(" ")
    n_atom = int(xyz_panda.loc[0, 0])
    smiles = n_atom + 3
    inchi = n_atom + 4
    atom_line = []
    harmony = n_atom + 2
    harmony_np = xyz_panda.loc[harmony, :].dropna().values
    harmony_line = list(np.ndarray.tolist(harmony_np))
    harmonic_max = [harmony_line]

    #this loop extracts the atomic coordinates and Mulliken charges from the compound.
    for i in range(2, n_atom + 2):
        atom_line.append([
            xyz_panda.loc[i, 0], xyz_panda.loc[i, 1], xyz_panda.loc[i, 2],
            xyz_panda.loc[i, 3], xyz_panda.loc[i, 4]
        ])

    #These lines take each of the properties encoded in the second line of the xyz file into individual instances of the Property() class.
    Inchi = Property(name="InChI Key Identifier",
                     scalars=xyz_panda.loc[inchi, 0])
    SMILES = Property(name="(SMILES) Chemical Identifier",
                      scalars=xyz_panda.loc[smiles, 0])
    Atom_Number = Property(name="Number of Atoms", scalars=xyz_panda.loc[0, 0])
    Tag = Property(name="Database Tag + Id", scalars=xyz_panda.loc[1, 0])
    Rotational_Constant_A = Property(name="Rotational Constant A",
                                     scalars=xyz_panda.loc[1, 1],
                                     units="GHz",
                                     data_type="COMPUTATIONAL")
    Rotational_Constant_B = Property(name="Rotational Constant B",
                                     scalars=xyz_panda.loc[1, 2],
                                     units="GHz",
                                     data_type="COMPUTATIONAL")
    Rotational_Constant_C = Property(name="Rotational Constant C",
                                     scalars=xyz_panda.loc[1, 3],
                                     units="GHZ",
                                     data_type="COMPUTATIONAL")
    Dipole_Moment = Property(name="Dipole Moment",
                             scalars=xyz_panda.loc[1, 4],
                             units="Debye",
                             data_type="COMPUTATIONAL")
    Isotropic_polarizability = Property(name="Isotropic polarizability",
                                        scalars=xyz_panda.loc[1, 5],
                                        units="Å^3",
                                        data_type="COMPUTATIONAL")
    Energy_of_HOMO = Property(name="Energy of H**O",
                              scalars=xyz_panda.loc[1, 6],
                              units="Ha",
                              data_type="COMPUTATIONAL")
    Energy_of_LUMO = Property(name="Energy of LUMO",
                              scalars=xyz_panda.loc[1, 7],
                              units="Ha",
                              data_type="COMPUTATIONAL")
    Band_Gap = Property(name="Band Gap",
                        scalars=xyz_panda.loc[1, 8],
                        units="Ha",
                        data_type="COMPUTATIONAL")
    Electronic_spactial_extent = Property(name="Electronic spactial extent",
                                          scalars=xyz_panda.loc[1, 9],
                                          units="Ha",
                                          data_type="COMPUTATIONAL")
    Zero_point_vibrational_energy = Property(
        name="Zero point vibrational energy",
        scalars=xyz_panda.loc[1, 10],
        units="Ha",
        data_type="COMPUTATIONAL")
    Internal_energy_at_0K = Property(name="Internal energy at 0K",
                                     scalars=xyz_panda.loc[1, 11],
                                     units="Ha",
                                     data_type="COMPUTATIONAL")
    Internal_energy_at_298_K = Property(name="Internal energy at 298 K",
                                        scalars=xyz_panda.loc[1, 12],
                                        units="Ha",
                                        data_type="COMPUTATIONAL")
    Enthalpy_at_298_K = Property(name="Enthalpy at 298 K",
                                 scalars=xyz_panda.loc[1, 13],
                                 units="Ha",
                                 data_type="COMPUTATIONAL")
    Free_energy_at_298_K = Property(name="Free energy at 298 K",
                                    scalars=xyz_panda.loc[1, 14],
                                    units="Ha",
                                    data_type="COMPUTATIONAL")
    Heat_capacity_at_298_K = Property(name="Heat capacity at 298 K",
                                      scalars=xyz_panda.loc[1, 15],
                                      units="Ha",
                                      data_type="COMPUTATIONAL")
    Atomic_coordinates = Property(
        name="Atom Coordinates for (Atom, Position Vector, Mulliken Charge)",
        matrices=atom_line,
        units="Chemical, (x,y,z), e",
        data_type="COMPUTATIONAL")
    Ref = Reference(
        title=
        "Quantum chemistry structures and properties of 134 kilo molecules.",
        publisher="Nature",
        url="https://www.nature.com/articles/sdata201422",
        authors="R. Ramakrishnan, Dral P. O., Rupp, M.")
    Harmonic = Property(name="Harmonic Vibrational Frequencies",
                        matrices=harmonic_max,
                        units="cm^-1",
                        data_type="COMPUTATIONAL")
    #collecting all the features for the PIF
    my_pif.properties = [
        SMILES, Inchi, Atom_Number, Tag, Rotational_Constant_A,
        Rotational_Constant_B, Rotational_Constant_C, Dipole_Moment,
        Isotropic_polarizability, Energy_of_HOMO, Energy_of_LUMO, Band_Gap,
        Electronic_spactial_extent, Zero_point_vibrational_energy,
        Internal_energy_at_0K, Internal_energy_at_298_K, Enthalpy_at_298_K,
        Free_energy_at_298_K, Heat_capacity_at_298_K, Harmonic,
        Atomic_coordinates
    ]
    my_pif.resources = [Ref]

    with open(a + ".json", "w") as outfile:
        pif.dump(my_pif, outfile)