Esempio n. 1
0
 def test_misc(self):
     cif_doc = cif.read_file(self.basename + '.cif')
     json_str = cif_doc.as_json()
     json_from_cif = json.loads(json_str)
     with io.open(self.basename + '.json', encoding='utf-8') as f:
         reference_json = json.load(f)
     self.assertEqual(json_from_cif, reference_json)
Esempio n. 2
0
 def import_equipment_from_file(self, filename='') -> None:
     """
     Import an equipment entry from a cif file.
     """
     if not filename:
         filename = cif_file_open_dialog(
             filter="CIF file (*.cif  *.cif_od *.cfx)")
     if not filename:
         print('No file given')
         return
     try:
         doc = cif.read_file(filename)
     except RuntimeError as e:
         show_general_warning(str(e))
         return
     block = doc.sole_block()
     table_data = []
     for item in block:
         if item.pair is not None:
             key, value = item.pair
             if filename.endswith(
                     '.cif_od') and key not in include_equipment_imports:
                 continue
             table_data.append([
                 key,
                 retranslate_delimiter(
                     cif.as_string(value).strip('\n\r ;'))
             ])
     if filename.endswith('.cif_od'):
         name = Path(filename).stem
     else:
         name = block.name.replace('__', ' ')
     self.settings.save_settings_list('equipment', name, table_data)
     self.show_equipment()
def parse_star_selected_columns(file_path, col1_name, col2_name):
    doc = cif.read_file(file_path)

    optics_data = {}

    # 3.1 star files have two data blocks Optics and particles
    _new_star_ = True if len(doc) == 2 else False

    if _new_star_:
        print('Found Relion 3.1+ star file.')

        optics = doc[0]
        particles = doc[1]

        for item in optics:
            for optics_metadata in item.loop.tags:
                value = optics.find_loop(optics_metadata)
                optics_data[optics_metadata] = np.array(value)[0]

    else:
        print('Found Relion 3.0 star file.')
        particles = doc[0]

    particles_data = pd.DataFrame()

    print('Reading star file:')

    for particle_metadata in [col1_name, col2_name]:
        loop = particles.find_loop(particle_metadata)
        particles_data[particle_metadata] = np.array(loop)

    return optics_data, particles_data
Esempio n. 4
0
def parse_star_model(file_path, loop_name):
    doc = cif.read_file(file_path)

    # block 1 is the per class information
    loop = doc[1].find_loop(loop_name)
    class_data = np.array(loop)

    return class_data
Esempio n. 5
0
def cif_to_dict(cif_file: str, mmjson: bool = False) -> tp.Generator:
    """Convert cif file to a dictionary."""
    cif_path = Path(cif_file)
    doc = cif.read_file(str(cif_path))
    dct: dict = json.loads(doc.as_json(mmjson=mmjson))
    if not mmjson:
        for block_name, block_dct in dct.items():
            block_dct['name'] = block_name
            block_dct['cif_file'] = str(cif_path.absolute())
            yield block_dct
    else:
        yield dct
 def parse_mmcif(self, fileName):
     """parse the mmcif and return a dictionary file"""
     # from http://gemmi.readthedocs.io/en/latest/cif-parser.html#python-module
     if fileName and os.path.exists(fileName):
         try:
             self.cifObj = cif.read_file(
                 fileName)  # copy all the data from mmCIF file
             if self.cifObj:
                 return True
         except Exception as e:
             logging.error(e)
     return False
Esempio n. 7
0
 def parse_mmcif(self):
     """parse the mmcif and return a dictionary file"""
     # from http://gemmi.readthedocs.io/en/latest/cif-parser.html#python-module
     if self.f and os.path.exists(self.f):
         try:
             self.cifObj = cif.read_file(self.f)  # copy all the data from mmCIF file
             if self.cifObj:
                 # self.getDataBlockWithMostCat()
                 self.getDataBlockWithAtomSite()
                 # self.getDatablock()
                 return True
         except Exception as e:
             logging.error(e)
     return False
Esempio n. 8
0
 def import_author(self, filename=''):
     """
     Import an author from a cif file.
     """
     cif_auth_to_str = {
         '_publ_contact_author_name': 'name',
         '_publ_contact_author_address': 'address',
         '_publ_contact_author_email': 'email',
         '_publ_contact_author_phone': 'phone',
         '_publ_contact_author_id_orcid': 'orcid',
         #
         '_publ_author_name': 'name',
         '_publ_author_address': 'address',
         '_publ_author_email': 'email',
         '_publ_author_phone': 'phone',
         '_publ_author_id_orcid': 'orcid',
         '_publ_author_footnote': 'footnote',
     }
     if not filename:
         filename = cif_file_open_dialog(filter="CIF file (*.cif)")
     if not filename:
         return
     try:
         doc = read_file(filename)
     except RuntimeError as e:
         show_general_warning(str(e))
         return
     block = doc.sole_block()
     table_data = {}
     for item in block:
         if item.pair is not None:
             key, value = item.pair
             if key not in cif_auth_to_str:
                 continue
             key = cif_auth_to_str.get(key)
             table_data.update({
                 key:
                 retranslate_delimiter(as_string(value).strip('\n\r ;'))
             })
     name = block.name.replace('__', ' ')
     if 'contact author' in name:
         table_data.update({'contact': True})
     if not table_data.get('name'):
         return None
     self.general_author_save(table_data)
     self.show_author_loops()
Esempio n. 9
0
 def import_property_from_file(self, filename: str = '') -> None:
     """
     Imports a cif file as entry of the property templates list.
     """
     if not filename:
         filename = cif_file_open_dialog(filter="CIF file (*.cif)")
     if not filename:
         return
     try:
         doc = cif.read_file(filename)
     except RuntimeError as e:
         show_general_warning(str(e))
         return
     property_list = self.settings.settings.value('property_list')
     if not property_list:
         property_list = ['']
     block = doc.sole_block()
     template_list = []
     loop_column_name = ''
     for i in block:
         if i.loop is not None:
             if len(i.loop.tags) > 0:
                 loop_column_name = i.loop.tags[0]
             for n in range(i.loop.length()):
                 value = i.loop.val(n, 0)
                 template_list.append(
                     retranslate_delimiter(
                         cif.as_string(value).strip("\n\r ;")))
     block_name = block.name.replace('__', ' ')
     # This is the list shown in the Main menu:
     property_list.append(block_name)
     table = self.app.ui.PropertiesEditTableWidget
     table.setRowCount(0)
     self.app.ui.cifKeywordLineEdit.setText(loop_column_name)
     newlist = [x for x in list(set(property_list)) if x]
     newlist.sort()
     # this list keeps track of the property items:
     self.settings.save_template_list('property_list', newlist)
     template_list.insert(0, '')
     template_list = list(set(template_list))
     # save as dictionary for properties to have "_cif_key : itemlist"
     # for a table item as dropdown menu in the main table.
     table_data = [loop_column_name, template_list]
     self.settings.save_template_list('property/' + block_name, table_data)
     self.show_properties()
Esempio n. 10
0
def parse_star_data(file_path, loop_name):
    do_again = True
    while do_again:
        try:
            doc = cif.read_file(file_path)

            if len(doc) == 2:
                particles_block = 1
            else:
                particles_block = 0

            # block 1 is the per class information
            loop = doc[particles_block].find_loop(loop_name)
            class_data = np.array(loop)

            do_again = False
            return class_data

        except RuntimeError:
            print('*star file is busy')
            time.sleep(5)
def parse_star(file_path):
    import tqdm

    doc = cif.read_file(file_path)

    optics_data = {}

    # 3.1 star files have two data blocks Optics and particles
    _new_star_ = True if len(doc) == 2 else False

    if _new_star_:
        print('Found Relion 3.1+ star file.')

        optics = doc[0]
        particles = doc[1]

        for item in optics:
            for optics_metadata in item.loop.tags:
                value = optics.find_loop(optics_metadata)
                optics_data[optics_metadata] = np.array(value)[0]

    else:
        print('Found Relion 3.0 star file.')
        particles = doc[0]

    particles_data = pd.DataFrame()

    print('Reading star file:')
    for item in particles:
        for particle_metadata in tqdm.tqdm(item.loop.tags):
            # If don't want to use tqdm uncomment bottom line and remove 'import tqdm'
            # for particle_metadata in item.loop.tags:
            loop = particles.find_loop(particle_metadata)
            particles_data[particle_metadata] = np.array(loop)

    return optics_data, particles_data
Esempio n. 12
0
#!/usr/bin/env python
import sys
from gemmi import cif

greeted = set()
for path in sys.argv[1:]:
    try:
        doc = cif.read_file(path)  # copy all the data from mmCIF file
        block = doc.sole_block()  # mmCIF has exactly one block
        for s in block.find_loop("_atom_site.type_symbol"):
            if s not in greeted:
                print("Hello " + s)
                greeted.add(s)
    except Exception as e:
        print("Oops. %s" % e)
        sys.exit(1)
from gemmi import cif
import pandas as pd
import sys
import ntpath

if __name__ == '__main__':

    filename = sys.argv[1]

    doc = cif.read_file(filename)

    block = doc.sole_block()

    # Map author chain IDs to sequences
    chain_ids = block.find_values('_entity_poly.pdbx_strand_id')
    chain_seqs = block.find_values('_entity_poly.pdbx_seq_one_letter_code_can')

    chain_seq_map = {}

    for idx, seq in zip(chain_ids, chain_seqs):
        clean_seq = seq.replace(';', '').replace('\n', '')

        for chain_id in idx.split(','):
            chain_seq_map[chain_id] = clean_seq

    # Extract atom info
    standard_res = list(block.find_values('_atom_site.label_comp_id'))
    standard_chain = list(block.find_values('_atom_site.label_asym_id'))
    standard_seq_pos = list(block.find_values('_atom_site.label_seq_id'))
    auth_res = list(block.find_values('_atom_site.auth_comp_id'))
    auth_chain = list(block.find_values('_atom_site.auth_asym_id'))
import numpy as np
from gemmi import cif

aif = cif.read_file(
    'database/DUT-6/NK_DUT-6_LP_N2_114PKT (Raw Analysis Data).aif')
block = aif.sole_block()
ads_press = np.array(block.find_loop('_adsorp_pressure'), dtype=float)
ads_p0 = np.array(block.find_loop('_adsorp_p0'), dtype=float)
ads_amount = np.array(block.find_loop('_adsorp_amount'), dtype=float)
des_press = np.array(block.find_loop('_desorp_pressure'), dtype=float)
des_p0 = np.array(block.find_loop('_desorp_p0'), dtype=float)
des_amount = np.array(block.find_loop('_desorp_amount'), dtype=float)

import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.rcParams['pdf.fonttype'] = 42

plt.rcParams.update({'font.size': 6})

f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True)

f.tight_layout()
f.set_size_inches(3, 2.2)

ax2.semilogx(ads_press / ads_p0, ads_amount, 'o', color='tab:blue', ms=5)
ax2.plot(des_press / des_p0,
         des_amount,
         'o',
         markerfacecolor='white',
         color='tab:blue',
Esempio n. 15
0
import os

# Directory whose cif data has to be changed 
directory = '../data/data_86_FE_BG'
# directory = "data/sample"

# Fraction how much ( e.g. 0.99 mean the final bond length will be ( 1-0.99) * original )
fraction = 0.99
mod_dir = directory+'_'+str(fraction)+'/'

if __name__ == '__main__':
    # make directory if not existed
    if not os.path.exists(mod_dir):
        os.makedirs(mod_dir)
    for file in os.listdir(directory):
        # read all the files and copy if not cif ( like material_id_hash.csv, id_prop.csv etc.)
        if file.endswith(".cif"):
            print(file)
            doc = cif.read_file(directory+"/"+file)
            block = doc.sole_block()
            len_a = str(fraction*float(block.find_pair('_cell_length_a')[1]))
            len_b = str(fraction * float(block.find_pair('_cell_length_b')[1]))
            len_c = str(fraction * float(block.find_pair('_cell_length_c')[1]))
            block.set_pair('_cell_length_a',len_a)
            block.set_pair('_cell_length_b',len_b)
            block.set_pair('_cell_length_c',len_c)
            # saving the modified cif file 
            doc.write_file(mod_dir+file)
        else:
            os.popen('cp ' + directory + "/" + file + ' ' + mod_dir + file)
Esempio n. 16
0
    def _initialize_blocks(self):
        """
        Converts a gemmi Document object representing the .star file
        at self.filepath into an OrderedDict of pandas dataframes, each of which represents one block in the .star file
        """
        logger.info(f"Parsing star file at: {self.filepath}")
        gemmi_doc = cif.read_file(self.filepath)
        # iterate over gemmi Block objects in the gemmi Document
        for gemmi_block in gemmi_doc:
            # iterating over gemmi Block objects yields Item objects
            # Items can have a Loop object and/or a Pair object
            # Loops correspond to the regular loop_ structure in a STAR file
            # Pairs have type List[str[2]] and correspond to a non-loop key value
            # pair in a STAR file, e.g.
            # _field1 \t 'value' #1

            # Our model of the .star file only allows a block to be one or the other
            block_has_pair = False
            block_has_loop = False
            # populated if this block has a pair
            pairs = {}
            # populated if this block as a loop
            loop_tags = []
            loop_data = []
            # correct for GEMMI default behavior
            # if a block is called 'data_' in the .star file, GEMMI names it '#'
            # but we want to name it '' for consistency
            if gemmi_block.name == "#":
                gemmi_block.name = ""
            for gemmi_item in gemmi_block:
                if gemmi_item.pair is not None:
                    block_has_pair = True
                    # if we find both a pair and a loop raise an error
                    if block_has_loop:
                        raise StarFileError(
                            "Blocks with multiple loops and/or pairs are not supported"
                        )
                    # assign key-value pair to dictionary
                    pair_key, pair_val = gemmi_item.pair
                    if pair_key not in pairs:
                        # read in as str because we do not want type conversion
                        pairs[pair_key] = str(pair_val)
                    else:
                        raise StarFileError(
                            f"Duplicate key in pair: {gemmi_item.pair[0]}")
                if gemmi_item.loop is not None:
                    block_has_loop = True
                    # if we find both a pair and a loop raise an error
                    if block_has_pair:
                        raise StarFileError(
                            "Blocks with multiple loops and/or pairs are not supported"
                        )
                    loop_tags = gemmi_item.loop.tags
                    # convert loop data to a list of lists
                    # using the .val(row, col) method of gemmi's Loop class
                    loop_data = [None] * gemmi_item.loop.length()
                    for row in range(gemmi_item.loop.length()):
                        loop_data[row] = [
                            gemmi_item.loop.val(row, col)
                            for col in range(gemmi_item.loop.width())
                        ]
            if block_has_pair:
                if gemmi_block.name not in self.blocks:
                    # represent a set of pairs by a dictionary
                    self.blocks[gemmi_block.name] = pairs
                else:
                    # enforce unique block names (keys of StarFile.block OrderedDict)
                    raise StarFileError(
                        f"Attempted overwrite of existing data block: {gemmi_block.name}"
                    )
            elif block_has_loop:
                if gemmi_block.name not in self.blocks:
                    # initialize DF from list of lists
                    # read in with dtype=str because we do not want type conversion
                    self.blocks[gemmi_block.name] = pd.DataFrame(
                        loop_data, columns=loop_tags, dtype=str)
                else:
                    # enforce unique block names (keys of StarFile.block OrderedDict)
                    raise StarFileError(
                        f"Attempted overwrite of existing data block: {gemmi_block.name}"
                    )