Exemple #1
0
    def __init__(self, file_path=None, compress=False, mode='w', preserve_order=False):
#        """"""
#        #orig
#        self._handle = openGzip(
#            file_path,
#            'w') if file_path is not None else file_path
        #new
        self.compress = compress
        self.mode = 'wb' if (compress and mode == 'w') else mode
        self.preserve_token_order = preserve_order
        
        if (file_path and isinstance(file_path, str)) or file_path is None:
            file_path = ( file_path 
                    if (file_path and isinstance(file_path, str) and not compress)
                    else (
                        file_path+".gz" 
                        if (file_path and isinstance(file_path, str) and not file_path.endswith(".gz") and compress)
                        else file_path
                    )
                )
            self._handle = openGzip(
                file_path,
                mode) if file_path is not None else file_path
        else:
            from exceptions import TypeError
            raise TypeError("file_path argument is not a string")
        
        self.verbose = False  # TODO: Not implemented
Exemple #2
0
    def _exportCifFile(self, file_path, token_ordering):
        """"""
        cf = None
        if file_path is not None:
            cif_file = openGzip(file_path, 'r')

            tokeniser = StarTokeniser()
            tokeniser.start_matching(cif_file)

            if cif_file:
                cf = CifFile(file_path, preserve_token_order=token_ordering)
            db = None
            sf = None
            cc = None
            ci = None
            loopItems = []
            loopValues = []
            loop_state = False
            save_state = False
            loop_value_state = False

            # Keller tokenizer provides the following tokens:
            # "", "MULTILINE", "COMMENT", "GLOBAL", "SAVE_FRAME", "SAVE_FRAME_REF",
            # "LOOP_STOP", "DATA_BLOCK", "LOOP", "BAD_CONSTRUCT", "DATA_NAME", "SQUOTE_STRING",
            # "DQUOTE_STRING", "NULL", "UNKNOWN", "SQUARE_BRACKET", "STRING", "BAD_TOKEN"

            DATA_TOKENS = ["MULTILINE", "SQUOTE_STRING", "DQUOTE_STRING", "NULL", "UNKNOWN", "STRING"]
            # NB: Square bracket  types are not currently handled

            for tok in tokeniser:
                if tok.type_string == 'BAD_TOKEN':
                    raise BadStarTokenError(tok)

                if tok.type_string == 'DATA_BLOCK':
                    db = cf.setDataBlock(tok.value[tok.value.find('_')+1:])
                    loop_state = False
                    save_state = False

                elif tok.type_string == 'LOOP':
                    loop_value_state = False
                    if not loop_state:
                        loop_state = True
                    if loopValues != []:
                        self._processLoop(cc, loopItems, loopValues)
                        loopItems = []
                        loopValues = []

                elif tok.type_string == 'SAVE_FRAME':
                    if save_state:
                        save_state = False
                    else:
                        sf = db.setSaveFrame(tok.value[tok.value.find('_')+1:])
                        save_state = True
                    if loop_state:
                        loop_state = False
                        if loopValues != []:
                            self._processLoop(cc, loopItems, loopValues)
                            loopItems = []
                            loopValues = []

                elif tok.type_string == 'DATA_NAME':
                    [category_name, item_name] = tok.value.split('.')
                    if loop_value_state:
                        loop_state = False
                        loop_value_state = False
                        if loopValues != []:
                            self._processLoop(cc, loopItems, loopValues)
                            loopItems = []
                            loopValues = []

                    if not save_state:
                        cc = db.setCategory(category_name)
                    else:
                        cc = sf.setCategory(category_name)

                    if loop_state:
                        loopItems.append(item_name)

                    ci = cc.setItem(item_name)

                elif tok.type_string in DATA_TOKENS: # It's a data contatining token
                    token_value = tok.value
                    if loop_state:
                        loopValues.append((token_value, tok.type_string))
                        if not loop_value_state:
                            loop_value_state = True
                    else:
                        ci.setValue(token_value, tok.type_string)
            if loopValues != []:
                self._processLoop(cc, loopItems, loopValues)
                loopItems = []
                loopValues = []

        return cf
Exemple #3
0
    def _writeCifObj(self, cifObjIn, compress=False, mode='w'):
        """"""
        if self._handle is None:
            try:
                if compress:
                    self._handle = gzip.open(cifObjIn.file_path + ".gz", mode)
                else:
                    self._handle = openGzip(cifObjIn.file_path, mode)
            except Exception as err:
                print("CifFileWriter error: %s" % str(err))
                print("Could not write mmCIF file (No output path/filename specified)")
                return

        for datablock in cifObjIn.getDataBlocks():
            self._handle.write(self.DATABLOCK % datablock.getId())
            for category in datablock.getCategories():
                if not category.isTable:
                    for item in category.getItems():
                        tag = (self.CAT_ITM % (category.getId(), item.name))
                        tag = tag.ljust(category._maxTagLength + 8)
                        self._handle.write(
                            tag + item.getFormattedValue() + "\n")
                else:
                    self._handle.write(self.LOOP)
                    table = []
                    colLen = None
                    for item in category.getItems():
                        tag = (self.CAT_ITM % (category.getId(), item.name))
                        tag = tag.ljust(category._maxTagLength + 8)
                        self._handle.write(tag + "\n")
                        table.append(item.getFormattedValue())
                        if not colLen:
                            colLen = len(item.value)

                    self._handle.write(pretty_print(table, transpose=True))
                self._handle.write('\n' + self.NEWLINE)
                # HANDLE SAVEFRAMES #

            for saveframe in datablock.getSaveFrames():
                self._handle.write(self.SAVEFRAMESTART % saveframe.getId())
                for category in saveframe.getCategories():
                    if not category.isTable:
                        for item in category.getItems():
                            tag = (
                                self.CAT_ITM %
                                (category.getId(), item.name))
                            tag = tag.ljust(category._maxTagLength + 8)
                            self._handle.write(
                                tag + item.getFormattedValue() + "\n")
                    else:
                        self._handle.write(self.LOOP)
                        table = []
                        colLen = None
                        for item in category.getItems():
                            tag = (
                                self.CAT_ITM %
                                (category.getId(), item.name))
                            tag = tag.ljust(category._maxTagLength + 8)
                            self._handle.write(tag + "\n")
                            table.append(item.getFormattedValue())
                            if not colLen:
                                colLen = len(item.value)
                        self._handle.write(pretty_print(table, transpose=True))
                    self._handle.write('\n' + self.NEWLINE)
                self._handle.write(self.SAVEFRAMEEND)
        self._handle.flush()
Exemple #4
0
    def _parseFile(self, file_path, ignoreCategories, preserve_token_order, onlyCategories):
        """Private method that will do the work of parsing the mmCIF data file
        return Dictionary"""

        if preserve_token_order:
            try:
                from collections import OrderedDict as _dict
            except ImportError:
                # fallback: try to use the ordereddict backport when using python 2.6
                try:
                    from ordereddict import OrderedDict as _dict
                except ImportError:
                    # backport not installed: use local OrderedDict
                    from mmCif.ordereddict import OrderedDict as _dict
        else:
            _dict = dict


        mmcif_like_file = _dict()
        data_block = _dict()
        save_block = _dict()

        data_heading = ""
        line_num = 0
        try:
            with openGzip(file_path, 'r') as f1:
                table_names = []
                table_values = []
                table_values_array = []
                isLoop = False
                multiLineValue = False
                skipCategory = False
                for line in f1:
                    line_num+=1
                    if skipCategory:
                        flag = False
                        while line:
                            check = (line.strip().startswith('_') or
                                self.loopRE.match(line.strip()[:5]) or
                                self.saveRE.match(line.strip()[:5]) or
                                self.dataRE.match(line.strip()[:5]))
                            if flag:
                                if check:
                                    isLoop = False
                                    break
                            else:
                                if not check:
                                    flag = True
                            if not (self.saveRE.match(line.strip()[:5]) or
                                self.dataRE.match(line.strip()[:5])):
                                try:
                                    line = next(f1)
                                    line_num+=1
                                except StopIteration:
                                    break
                            else:
                                break
                        skipCategory = False

                    if isLoop is True and table_values_array != [] and (self.loopRE.match(line) is not None or (line.strip().startswith('_'))):
                        isLoop = False
                        num_item = len(table_names)
                        if len(table_values_array) % num_item != 0:
                            raise MMCIFWrapperSyntaxError(category)
                        for val_index, item in enumerate(table_names):
                            data_block[category][item] = table_values_array[val_index::num_item]
                        table_values_array = []

                    if line.strip() == "":
                        continue
                    if line.startswith('#'):
                        continue
                    if '\t#' in line or ' #' in line and not line.startswith(';'):
                        new_line = ''
                        for tok in self.dataValueRE.findall(line):
                            if not tok.startswith('#'):
                                new_line += tok+" "
                            else:
                                break
                        # make sure to preserve the fact that ';' was not the first character
                        line = new_line if not new_line.startswith(';') else " "+new_line
                        # Fails for entries "3snv", "1kmm", "1ser", "2prg", "3oqd"
                        # line = re.sub(r'\s#.*$', '', line)
                    if line.startswith(';'):
                        while '\n;' not in line:
                            try:
                                line += next(f1)
                                line_num+=1
                            except StopIteration:
                                break
                        multiLineValue = True
                    if self.dataRE.match(line):
                        if data_block != {}:
                            if table_values_array != []:
                                isLoop = False
                                num_item = len(table_names)
                                if len(table_values_array) % num_item != 0:
                                    raise mmCifSyntaxError(category)
                                for val_index, item in enumerate(table_names):
                                    data_block[category][item] = table_values_array[val_index::num_item]
                                table_names = []
                                table_values_array = []
                            mmcif_like_file[data_heading] = data_block
                            data_block = _dict()
                        data_heading = self.dataRE.match(line).group('data_heading')
                    elif self.saveRE.match(line):
                        while line.strip() != 'save_':
                            try:
                                line = next(f1)
                                line_num+=1
                            except StopIteration:
                                break
                        continue
                    elif self.loopRE.match(line):
                        # Save and clear the table_values_array buffer from the
                        # previous loop that was read
                        if table_values_array != []:
                            for itemIndex, name in enumerate(table_names):
                                data_block[category].update({name:[row[itemIndex] for row in table_values_array]})
                            table_values_array = []
                        isLoop = True
                        category, item, value = None, None, None
                        #Stores items of a category listed in loop blocks
                        table_names = []
                        #Stores values of items in a loop as a single row
                        table_values = []
                    elif self.dataNameRE.match(line):
                        # Match category and item simultaneously
                        m = self.dataNameRE.match(line)
                        category = m.group('data_category')
                        item = m.group('category_item')
                        remainder = m.group('remainder')
                        value = None
                        if isLoop and remainder != '':
                            """Append any data values following the last loop
                            category.item tag should any exist"""
                            table_values += self._tokenizeData(remainder)
                            line = ''
                        else:
                            line = remainder + "\n"
                        if not isLoop:
                            if line.strip() != '':
                                value = self._tokenizeData(line)
                            else:
                                # For cases where values are on the following
                                # line
                                try:
                                    line = next(f1)
                                    line_num +=1
                                except StopIteration:
                                    break
                            while value is None:
                                char_start = 1 if line.startswith(';') else 0
                                while line.startswith(';') and not line.rstrip().endswith('\n;'):
                                    try:
                                        line += next(f1)
                                        line_num+=1
                                    except StopIteration:
                                        break
                                value = (line[char_start:line.rfind('\n;')]).strip()
                                if char_start > 0:
                                    value = (line[char_start:line.rfind('\n;')]).strip()
                                else:
                                    value = self._tokenizeData(" "+line)
                            if (ignoreCategories and category in ignoreCategories) or (onlyCategories and category not in onlyCategories):
                                pass
                            else:
                                if category in data_block:
                                    data_block[category].update({item: value if len(value) > 1 else value[0]})
                                else:
                                    data_block.setdefault(category, _dict({item: value if len(value) > 1 else value[0]})) # OrderedDict here preserves item order
                        else:
                            if (ignoreCategories and category in ignoreCategories) or (onlyCategories and category not in onlyCategories):
                                skipCategory = True
                            else:
                                data_block.setdefault(category, _dict()) # OrderedDict here preserves item order
                                table_names.append(item)
                    else:
                        if multiLineValue is True:
                            table_values.append((line[1:line.rfind('\n;')]).strip())
                            multiLineValue = False
                            line = line[line.rfind('\n;') + 2:]
                            if line.strip() != '':
                                table_values += self._tokenizeData(line)
                        else:
                            table_values += self._tokenizeData(line)

                        if table_values != []:
                            table_values_array += table_values
                            table_values = []
                if isLoop is True and table_values_array != []:
                    isLoop = False
                    num_item = len(table_names)
                    for val_index, item in enumerate(table_names):
                        data_block[category][item] = table_values_array[val_index::num_item]
                    table_values_array = []
                if data_block != {}:
                    mmcif_like_file[data_heading] = data_block
            return mmcif_like_file
        except KeyError as key_err:
            print("KeyError [line %i]: %s" %(line_num, str(key_err)))
        except IOError as io_err:
            print("IOException [line %i]: %s" % (line_num, str(io_err)))