Python openGzip Examples

Programming Language: Python

Namespace/Package Name: mmCif.utils

Method/Function: openGzip

Examples at hotexamples.com: 4

Python openGzip - 4 examples found. These are the top rated real world Python examples of mmCif.utils.openGzip extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: mmcifIO.py Project: chrisemblhh/PDBeCIF

    def __init__(self, file_path=None, compress=False, mode='w', preserve_order=False):
#        """"""
#        #orig
#        self._handle = openGzip(
#            file_path,
#            'w') if file_path is not None else file_path
        #new
        self.compress = compress
        self.mode = 'wb' if (compress and mode == 'w') else mode
        self.preserve_token_order = preserve_order
        
        if (file_path and isinstance(file_path, str)) or file_path is None:
            file_path = ( file_path 
                    if (file_path and isinstance(file_path, str) and not compress)
                    else (
                        file_path+".gz" 
                        if (file_path and isinstance(file_path, str) and not file_path.endswith(".gz") and compress)
                        else file_path
                    )
                )
            self._handle = openGzip(
                file_path,
                mode) if file_path is not None else file_path
        else:
            from exceptions import TypeError
            raise TypeError("file_path argument is not a string")
        
        self.verbose = False  # TODO: Not implemented

Example #2

Show file

File: mmcifIO.py Project: chrisemblhh/PDBeCIF

    def _exportCifFile(self, file_path, token_ordering):
        """"""
        cf = None
        if file_path is not None:
            cif_file = openGzip(file_path, 'r')

            tokeniser = StarTokeniser()
            tokeniser.start_matching(cif_file)

            if cif_file:
                cf = CifFile(file_path, preserve_token_order=token_ordering)
            db = None
            sf = None
            cc = None
            ci = None
            loopItems = []
            loopValues = []
            loop_state = False
            save_state = False
            loop_value_state = False

            # Keller tokenizer provides the following tokens:
            # "", "MULTILINE", "COMMENT", "GLOBAL", "SAVE_FRAME", "SAVE_FRAME_REF",
            # "LOOP_STOP", "DATA_BLOCK", "LOOP", "BAD_CONSTRUCT", "DATA_NAME", "SQUOTE_STRING",
            # "DQUOTE_STRING", "NULL", "UNKNOWN", "SQUARE_BRACKET", "STRING", "BAD_TOKEN"

            DATA_TOKENS = ["MULTILINE", "SQUOTE_STRING", "DQUOTE_STRING", "NULL", "UNKNOWN", "STRING"]
            # NB: Square bracket  types are not currently handled

            for tok in tokeniser:
                if tok.type_string == 'BAD_TOKEN':
                    raise BadStarTokenError(tok)

                if tok.type_string == 'DATA_BLOCK':
                    db = cf.setDataBlock(tok.value[tok.value.find('_')+1:])
                    loop_state = False
                    save_state = False

                elif tok.type_string == 'LOOP':
                    loop_value_state = False
                    if not loop_state:
                        loop_state = True
                    if loopValues != []:
                        self._processLoop(cc, loopItems, loopValues)
                        loopItems = []
                        loopValues = []

                elif tok.type_string == 'SAVE_FRAME':
                    if save_state:
                        save_state = False
                    else:
                        sf = db.setSaveFrame(tok.value[tok.value.find('_')+1:])
                        save_state = True
                    if loop_state:
                        loop_state = False
                        if loopValues != []:
                            self._processLoop(cc, loopItems, loopValues)
                            loopItems = []
                            loopValues = []

                elif tok.type_string == 'DATA_NAME':
                    [category_name, item_name] = tok.value.split('.')
                    if loop_value_state:
                        loop_state = False
                        loop_value_state = False
                        if loopValues != []:
                            self._processLoop(cc, loopItems, loopValues)
                            loopItems = []
                            loopValues = []

                    if not save_state:
                        cc = db.setCategory(category_name)
                    else:
                        cc = sf.setCategory(category_name)

                    if loop_state:
                        loopItems.append(item_name)

                    ci = cc.setItem(item_name)

                elif tok.type_string in DATA_TOKENS: # It's a data contatining token
                    token_value = tok.value
                    if loop_state:
                        loopValues.append((token_value, tok.type_string))
                        if not loop_value_state:
                            loop_value_state = True
                    else:
                        ci.setValue(token_value, tok.type_string)
            if loopValues != []:
                self._processLoop(cc, loopItems, loopValues)
                loopItems = []
                loopValues = []

        return cf

Example #3

Show file

File: mmcifIO.py Project: chrisemblhh/PDBeCIF

    def _writeCifObj(self, cifObjIn, compress=False, mode='w'):
        """"""
        if self._handle is None:
            try:
                if compress:
                    self._handle = gzip.open(cifObjIn.file_path + ".gz", mode)
                else:
                    self._handle = openGzip(cifObjIn.file_path, mode)
            except Exception as err:
                print("CifFileWriter error: %s" % str(err))
                print("Could not write mmCIF file (No output path/filename specified)")
                return

        for datablock in cifObjIn.getDataBlocks():
            self._handle.write(self.DATABLOCK % datablock.getId())
            for category in datablock.getCategories():
                if not category.isTable:
                    for item in category.getItems():
                        tag = (self.CAT_ITM % (category.getId(), item.name))
                        tag = tag.ljust(category._maxTagLength + 8)
                        self._handle.write(
                            tag + item.getFormattedValue() + "\n")
                else:
                    self._handle.write(self.LOOP)
                    table = []
                    colLen = None
                    for item in category.getItems():
                        tag = (self.CAT_ITM % (category.getId(), item.name))
                        tag = tag.ljust(category._maxTagLength + 8)
                        self._handle.write(tag + "\n")
                        table.append(item.getFormattedValue())
                        if not colLen:
                            colLen = len(item.value)

                    self._handle.write(pretty_print(table, transpose=True))
                self._handle.write('\n' + self.NEWLINE)
                # HANDLE SAVEFRAMES #

            for saveframe in datablock.getSaveFrames():
                self._handle.write(self.SAVEFRAMESTART % saveframe.getId())
                for category in saveframe.getCategories():
                    if not category.isTable:
                        for item in category.getItems():
                            tag = (
                                self.CAT_ITM %
                                (category.getId(), item.name))
                            tag = tag.ljust(category._maxTagLength + 8)
                            self._handle.write(
                                tag + item.getFormattedValue() + "\n")
                    else:
                        self._handle.write(self.LOOP)
                        table = []
                        colLen = None
                        for item in category.getItems():
                            tag = (
                                self.CAT_ITM %
                                (category.getId(), item.name))
                            tag = tag.ljust(category._maxTagLength + 8)
                            self._handle.write(tag + "\n")
                            table.append(item.getFormattedValue())
                            if not colLen:
                                colLen = len(item.value)
                        self._handle.write(pretty_print(table, transpose=True))
                    self._handle.write('\n' + self.NEWLINE)
                self._handle.write(self.SAVEFRAMEEND)
        self._handle.flush()

Example #4

Show file

File: __init__.py Project: chrisemblhh/PDBeCIF

    def _parseFile(self, file_path, ignoreCategories, preserve_token_order, onlyCategories):
        """Private method that will do the work of parsing the mmCIF data file
        return Dictionary"""

        if preserve_token_order:
            try:
                from collections import OrderedDict as _dict
            except ImportError:
                # fallback: try to use the ordereddict backport when using python 2.6
                try:
                    from ordereddict import OrderedDict as _dict
                except ImportError:
                    # backport not installed: use local OrderedDict
                    from mmCif.ordereddict import OrderedDict as _dict
        else:
            _dict = dict


        mmcif_like_file = _dict()
        data_block = _dict()
        save_block = _dict()

        data_heading = ""
        line_num = 0
        try:
            with openGzip(file_path, 'r') as f1:
                table_names = []
                table_values = []
                table_values_array = []
                isLoop = False
                multiLineValue = False
                skipCategory = False
                for line in f1:
                    line_num+=1
                    if skipCategory:
                        flag = False
                        while line:
                            check = (line.strip().startswith('_') or
                                self.loopRE.match(line.strip()[:5]) or
                                self.saveRE.match(line.strip()[:5]) or
                                self.dataRE.match(line.strip()[:5]))
                            if flag:
                                if check:
                                    isLoop = False
                                    break
                            else:
                                if not check:
                                    flag = True
                            if not (self.saveRE.match(line.strip()[:5]) or
                                self.dataRE.match(line.strip()[:5])):
                                try:
                                    line = next(f1)
                                    line_num+=1
                                except StopIteration:
                                    break
                            else:
                                break
                        skipCategory = False

                    if isLoop is True and table_values_array != [] and (self.loopRE.match(line) is not None or (line.strip().startswith('_'))):
                        isLoop = False
                        num_item = len(table_names)
                        if len(table_values_array) % num_item != 0:
                            raise MMCIFWrapperSyntaxError(category)
                        for val_index, item in enumerate(table_names):
                            data_block[category][item] = table_values_array[val_index::num_item]
                        table_values_array = []

                    if line.strip() == "":
                        continue
                    if line.startswith('#'):
                        continue
                    if '\t#' in line or ' #' in line and not line.startswith(';'):
                        new_line = ''
                        for tok in self.dataValueRE.findall(line):
                            if not tok.startswith('#'):
                                new_line += tok+" "
                            else:
                                break
                        # make sure to preserve the fact that ';' was not the first character
                        line = new_line if not new_line.startswith(';') else " "+new_line
                        # Fails for entries "3snv", "1kmm", "1ser", "2prg", "3oqd"
                        # line = re.sub(r'\s#.*$', '', line)
                    if line.startswith(';'):
                        while '\n;' not in line:
                            try:
                                line += next(f1)
                                line_num+=1
                            except StopIteration:
                                break
                        multiLineValue = True
                    if self.dataRE.match(line):
                        if data_block != {}:
                            if table_values_array != []:
                                isLoop = False
                                num_item = len(table_names)
                                if len(table_values_array) % num_item != 0:
                                    raise mmCifSyntaxError(category)
                                for val_index, item in enumerate(table_names):
                                    data_block[category][item] = table_values_array[val_index::num_item]
                                table_names = []
                                table_values_array = []
                            mmcif_like_file[data_heading] = data_block
                            data_block = _dict()
                        data_heading = self.dataRE.match(line).group('data_heading')
                    elif self.saveRE.match(line):
                        while line.strip() != 'save_':
                            try:
                                line = next(f1)
                                line_num+=1
                            except StopIteration:
                                break
                        continue
                    elif self.loopRE.match(line):
                        # Save and clear the table_values_array buffer from the
                        # previous loop that was read
                        if table_values_array != []:
                            for itemIndex, name in enumerate(table_names):
                                data_block[category].update({name:[row[itemIndex] for row in table_values_array]})
                            table_values_array = []
                        isLoop = True
                        category, item, value = None, None, None
                        #Stores items of a category listed in loop blocks
                        table_names = []
                        #Stores values of items in a loop as a single row
                        table_values = []
                    elif self.dataNameRE.match(line):
                        # Match category and item simultaneously
                        m = self.dataNameRE.match(line)
                        category = m.group('data_category')
                        item = m.group('category_item')
                        remainder = m.group('remainder')
                        value = None
                        if isLoop and remainder != '':
                            """Append any data values following the last loop
                            category.item tag should any exist"""
                            table_values += self._tokenizeData(remainder)
                            line = ''
                        else:
                            line = remainder + "\n"
                        if not isLoop:
                            if line.strip() != '':
                                value = self._tokenizeData(line)
                            else:
                                # For cases where values are on the following
                                # line
                                try:
                                    line = next(f1)
                                    line_num +=1
                                except StopIteration:
                                    break
                            while value is None:
                                char_start = 1 if line.startswith(';') else 0
                                while line.startswith(';') and not line.rstrip().endswith('\n;'):
                                    try:
                                        line += next(f1)
                                        line_num+=1
                                    except StopIteration:
                                        break
                                value = (line[char_start:line.rfind('\n;')]).strip()
                                if char_start > 0:
                                    value = (line[char_start:line.rfind('\n;')]).strip()
                                else:
                                    value = self._tokenizeData(" "+line)
                            if (ignoreCategories and category in ignoreCategories) or (onlyCategories and category not in onlyCategories):
                                pass
                            else:
                                if category in data_block:
                                    data_block[category].update({item: value if len(value) > 1 else value[0]})
                                else:
                                    data_block.setdefault(category, _dict({item: value if len(value) > 1 else value[0]})) # OrderedDict here preserves item order
                        else:
                            if (ignoreCategories and category in ignoreCategories) or (onlyCategories and category not in onlyCategories):
                                skipCategory = True
                            else:
                                data_block.setdefault(category, _dict()) # OrderedDict here preserves item order
                                table_names.append(item)
                    else:
                        if multiLineValue is True:
                            table_values.append((line[1:line.rfind('\n;')]).strip())
                            multiLineValue = False
                            line = line[line.rfind('\n;') + 2:]
                            if line.strip() != '':
                                table_values += self._tokenizeData(line)
                        else:
                            table_values += self._tokenizeData(line)

                        if table_values != []:
                            table_values_array += table_values
                            table_values = []
                if isLoop is True and table_values_array != []:
                    isLoop = False
                    num_item = len(table_names)
                    for val_index, item in enumerate(table_names):
                        data_block[category][item] = table_values_array[val_index::num_item]
                    table_values_array = []
                if data_block != {}:
                    mmcif_like_file[data_heading] = data_block
            return mmcif_like_file
        except KeyError as key_err:
            print("KeyError [line %i]: %s" %(line_num, str(key_err)))
        except IOError as io_err:
            print("IOException [line %i]: %s" % (line_num, str(io_err)))