def testEditAttributes(self): """Test case - get and extend atttribute names""" try: # dcA = DataCategory("A", self.__attributeList, self.__rowListAsciiA) self.assertEqual(0, dcA.getRowIndex()) self.assertEqual(None, dcA.getCurrentAttribute()) # na = len(dcA.getAttributeList()) tL = dcA.getAttributeListWithOrder() self.assertEqual(len(tL), na) na = len(dcA.getAttributeList()) self.assertEqual(dcA.appendAttribute("ColNew"), na + 1) row = dcA.getFullRow(0) self.assertEqual(row[na], "?") # row = dcA.getFullRow(dcA.getRowCount() + 1) for cV in row: self.assertEqual(cV, "?") # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_cond_select_values(self, category_data): dcM = DataCategory('A', category_data['attributeListMiss'], category_data['rowListUnicodeMiss']) atL = dcM.getAttributeList() for ii, at in enumerate(atL[1:]): assert len( dcM.selectIndices(category_data['testRowUnicodeMiss'][ii], at)) == dcM.getRowCount() # print("Window %r" % [tt for tt in window(atL)]) for atW in window(atL, size=1): assert len( dcM.selectValueListWhere( atW, category_data['testRowUnicodeMiss'][-1], category_data['attributeListMiss'] [-1])) == dcM.getRowCount() for atW in window(atL, size=2): assert len( dcM.selectValueListWhere( atW, category_data['testRowUnicodeMiss'][-1], category_data['attributeListMiss'] [-1])) == dcM.getRowCount() for atW in window(atL, size=3): assert len( dcM.selectValueListWhere( atW, category_data['testRowUnicodeMiss'][-1], category_data['attributeListMiss'] [-1])) == dcM.getRowCount() for atW in window(atL, size=4): assert len( dcM.selectValueListWhere( atW, category_data['testRowUnicodeMiss'][-1], category_data['attributeListMiss'] [-1])) == dcM.getRowCount()
def test_compare_values(self, category_data): dcU = DataCategory('A', category_data['attributeList'], category_data['rowListUnicode']) dcM = DataCategory('A', category_data['attributeListMiss'], category_data['rowListUnicodeMiss']) na = dcU.getAttributeList() assert len(na) >= 1 tupL = dcU.cmpAttributeValues(dcU) for tup in tupL: assert tup[1] == True tupL = dcU.cmpAttributeValues(dcM) for tup in tupL: if tup[0] in ['colC', 'colD']: assert tup[1] == False else: assert tup[1] == True # dcX = DataCategory('A', category_data['attributeList'], category_data['rowListUnicode']) assert dcX.setValue( u'134ĆćĈĉĊċČÄ�ÄŽÄ�Ä�Ä‘Ä’Ä“Ä Ä¡Ä¢Ä£Ä¤Ä¥Ä¦Ä§Ä¨xyz', attributeName='colD', rowIndex=dcX.getRowCount() - 2) tupL = dcU.cmpAttributeValues(dcX) for tup in tupL: if tup[0] in ['colD']: assert tup[1] == False else: assert tup[1] == True
def test_edit_extend(self, category_data): dcA = DataCategory('A', category_data['attributeList'], category_data['rowListAsciiA']) na = len(dcA.getAttributeList()) assert dcA.appendAttributeExtendRows('colNew') == na + 1 row = dcA.getRow(dcA.getRowCount() - 1) assert row[na] == "?"
def testCompareValues(self): """Test case - compare object values -""" try: dcU = DataCategory("A", self.__attributeList, self.__rowListUnicode) dcM = DataCategory("A", self.__attributeListMiss, self.__rowListUnicodeMiss) na = dcU.getAttributeList() self.assertGreaterEqual(len(na), 1) tupL = dcU.cmpAttributeValues(dcU) for tup in tupL: self.assertEqual(tup[1], True) tupL = dcU.cmpAttributeValues(dcM) for tup in tupL: if tup[0] in ["colC", "colD"]: self.assertEqual(tup[1], False) else: self.assertEqual(tup[1], True) # dcX = DataCategory("A", self.__attributeList, self.__rowListUnicode) self.assertTrue( dcX.setValue(u"134ĆćĈĉĊċČčĎďĐđĒēĠġĢģĤĥĦħĨxyz", attributeName="colD", rowIndex=dcX.getRowCount() - 2)) tupL = dcU.cmpAttributeValues(dcX) for tup in tupL: if tup[0] in ["colD"]: self.assertEqual(tup[1], False) else: self.assertEqual(tup[1], True) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testGetValues(self): """Test case - value getters""" try: dcU = DataCategory("A", self.__attributeList, self.__rowListUnicode) aL = dcU.getAttributeList() logger.debug("Row length %r", dcU.getRowCount()) for ii, v in enumerate(self.__testRowUnicode): at = aL[ii + 1] for j in range(0, dcU.getRowCount()): logger.debug("ii %d j %d at %s val %r ", ii, j, at, v) self.assertEqual(dcU.getValue(at, j), v) self.assertEqual(dcU.getValueOrDefault(at, j, "mydefault"), v) # # negative indices are interpreted in the python manner self.assertEqual(dcU.getValueOrDefault("colOrd", -1, "default"), 9) self.assertRaises(IndexError, dcU.getValue, "colOrd", dcU.getRowCount() + 1) self.assertRaises(ValueError, dcU.getValue, "badAtt", 0) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_edit_attributes(self, category_data): dcA = DataCategory('A', category_data['attributeList'], category_data['rowListAsciiA']) assert 0 == dcA.getRowIndex() assert None == dcA.getCurrentAttribute() # na = len(dcA.getAttributeList()) tL = dcA.getAttributeListWithOrder() assert len(tL) == na na = len(dcA.getAttributeList()) assert dcA.appendAttribute("ColNew") == na + 1 row = dcA.getFullRow(0) assert row[na] == "?" # row = dcA.getFullRow(dcA.getRowCount() + 1) for c in row: assert c == "?"
def testEditExtend(self): """Test case - category extension methods""" try: dcA = DataCategory("A", self.__attributeList, self.__rowListAsciiA) na = len(dcA.getAttributeList()) self.assertEqual(dcA.appendAttributeExtendRows("colNew"), na + 1) row = dcA.getRow(dcA.getRowCount() - 1) self.assertEqual(row[na], "?") # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_compare_attributes(self, category_data): dcU = DataCategory('A', category_data['attributeList'], category_data['rowListUnicode']) dcM = DataCategory('A', category_data['attributeListMiss'], category_data['rowListUnicodeMiss']) na = len(dcU.getAttributeList()) t1, t2, t3 = dcU.cmpAttributeNames(dcU) assert len(t1) == 0 assert len(t3) == 0 assert len(t2) == na t1, t2, t3 = dcU.cmpAttributeNames(dcM) assert len(t1) == 0 assert len(t3) == 3 assert len(t2) == na
def test_get_values(self, category_data): dcU = DataCategory('A', category_data['attributeList'], category_data['rowListUnicode']) aL = dcU.getAttributeList() print("Row length %r " % dcU.getRowCount()) for ii, v in enumerate(category_data['testRowUnicode']): at = aL[ii + 1] for j in range(0, dcU.getRowCount()): print("ii %d j %d at %s val %r " % (ii, j, at, v)) assert dcU.getValue(at, j) == v assert dcU.getValueOrDefault(at, j, 'mydefault') == v # # negative indices are interpreted in the python manner assert dcU.getValueOrDefault('colOrd', -1, 'default') == 9 with pytest.raises(IndexError): dcU.getValueOrDefault('colOrd', dcU.getRowCount() + 1, 0) with pytest.raises(ValueError): dcU.getValueOrDefault('badAtt', 0, 0)
def testCompareAttributes(self): """Test case - compare object attributes -""" try: dcU = DataCategory("A", self.__attributeList, self.__rowListUnicode) dcM = DataCategory("A", self.__attributeListMiss, self.__rowListUnicodeMiss) na = len(dcU.getAttributeList()) t1, t2, t3 = dcU.cmpAttributeNames(dcU) self.assertEqual(len(t1), 0) self.assertEqual(len(t3), 0) self.assertEqual(len(t2), na) t1, t2, t3 = dcU.cmpAttributeNames(dcM) self.assertEqual(len(t1), 0) self.assertEqual(len(t3), 3) self.assertEqual(len(t2), na) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testCondSelectValues(self): """Test case - value selections - """ try: dcM = DataCategory("A", self.__attributeListMiss, self.__rowListUnicodeMiss) # self.__testRowUnicodeMiss = [u'someData', 100222, None, '?', '.', u'abcdĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨxyz', 234.2345] # self.__attributeListMiss atL = dcM.getAttributeList() for ii, at in enumerate(atL[1:]): self.assertEqual( len(dcM.selectIndices(self.__testRowUnicodeMiss[ii], at)), dcM.getRowCount()) # logger.debug("Window %r", [tt for tt in window(atL)]) for atW in window(atL, size=1): self.assertEqual( len( dcM.selectValueListWhere( atW, self.__testRowUnicodeMiss[-1], self.__attributeListMiss[-1])), dcM.getRowCount()) for atW in window(atL, size=2): self.assertEqual( len( dcM.selectValueListWhere( atW, self.__testRowUnicodeMiss[-1], self.__attributeListMiss[-1])), dcM.getRowCount()) for atW in window(atL, size=3): self.assertEqual( len( dcM.selectValueListWhere( atW, self.__testRowUnicodeMiss[-1], self.__attributeListMiss[-1])), dcM.getRowCount()) for atW in window(atL, size=4): self.assertEqual( len( dcM.selectValueListWhere( atW, self.__testRowUnicodeMiss[-1], self.__attributeListMiss[-1])), dcM.getRowCount()) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def process_entry(file_in, file_out): try: cif_file = gemmi.cif.read(file_in) # pylint: disable=no-member data_block = cif_file[0] except Exception as e: logger.error("Failed to read cif file in Gemmi") logger.error(e) return 1 logging.info("Finding Centre of Mass") com = get_center_of_mass(data_block) if not com: return 1 try: io = IoAdapterCore() ccL = io.readFile(file_in) except Exception as e: logger.error("Failed to read cif file using IoAdapterCore %s", e) return 1 if len(ccL) == 0: logger.error("No data parsed from file") return 1 # First block only b0 = ccL[0] obj = b0.getObj("struct") # If category does not exist if obj is None: # Need entry.id eid = "XXXX" eobj = b0.getObj("entry") if eobj: if "id" in eobj.getAttributeList(): eid = eobj.getValue("id", 0) obj = DataCategory("struct") obj.appendAttribute("entry_id") obj.setValue(eid, "entry_id", 0) ccL[0].append(obj) newdata = [["pdbx_center_of_mass_x", com.x], ["pdbx_center_of_mass_y", com.y], ["pdbx_center_of_mass_z", com.z]] for [it, val] in newdata: if it not in obj.getAttributeList(): obj.appendAttribute(it) obj.setValue(str(val), it, 0) try: logging.info("Writing mmcif file: %s", file_out) ret = io.writeFile(file_out, ccL) if not ret: logger.info("Writing failed error %s", ret) return 1 except Exception as e: logger.error("Failed to write ccif file in IoAdapater %s", e) return 1 # existing_data = data_block.get_mmcif_category('_struct.') # new_data = { # **existing_data, # 'pdbx_center_of_mass_x': [com.x], # 'pdbx_center_of_mass_y': [com.y], # 'pdbx_center_of_mass_z': [com.z] # } # logging.info("Writing mmcif file: %s", file_out) # try: # data_block.set_mmcif_category('_struct.', new_data) # cif_file.write_file(file_out) # except Exception as e: # logger.error("Failed to write cif file in Gemmi") # logger.error(e) # return 1 return 0
def __parser(self, tokenizer, containerList, categorySelectionD=None, excludeFlag=False): """ Parser for PDBx data files and dictionaries. Input - tokenizer() reentrant method recognizing data item names (_category.attribute) quoted strings (single, double and multi-line semi-colon delimited), and unquoted strings. containerList - list-type container for data and definition objects parsed from from the input file. On return: The input containerList is appended with data and definition objects - """ catSelectD = categorySelectionD if categorySelectionD is not None else {} logger.debug("Exclude Flag %r Category selection %r", excludeFlag, catSelectD) # Working container - data or definition curContainer = None # the last container of type data - previousDataContainer = None # # Working category container categoryIndex = {} curCategory = None # curRow = None state = None # Find the first reserved word and begin capturing data. # while True: curCatName, curAttName, curQuotedString, curWord = next(tokenizer) if curWord is None: continue reservedWord, state = self.__getState(curWord) if reservedWord is not None: break while True: # # Set the current state - # # At this point in the processing cycle we are expecting a token containing # either a '_category.attribute' or a reserved word. # if curCatName is not None: state = "ST_KEY_VALUE_PAIR" elif curWord is not None: reservedWord, state = self.__getState(curWord) else: self.__syntaxError("Miscellaneous syntax error") return # # Process _category.attribute value assignments # if state == "ST_KEY_VALUE_PAIR": try: curCategory = categoryIndex[curCatName] except KeyError: # A new category is encountered - create a container and add a row curCategory = categoryIndex[curCatName] = DataCategory( curCatName) # # check if we have all of the selection if not excludeFlag and self.__allSelected( curContainer, catSelectD): return try: if catSelectD: if not excludeFlag and curCatName in catSelectD: curContainer.append(curCategory) elif excludeFlag and curCatName not in catSelectD: curContainer.append(curCategory) else: logger.debug( "Skipped unselected/excluded category %s", curCatName) else: curContainer.append(curCategory) except AttributeError: self.__syntaxError( "Category cannot be added to data_ block") return curRow = [] curCategory.append(curRow) else: # Recover the existing row from the category try: # curRow = curCategory[0] curRow = curCategory.getRow(0) except IndexError: self.__syntaxError( "Internal index error accessing category data") return # Check for duplicate attributes and add attribute to table. if curAttName in curCategory.getAttributeList(): self.__syntaxError( "Duplicate attribute encountered in category") return else: curCategory.appendAttribute(curAttName) # Get the data for this attribute from the next token tCat, _, curQuotedString, curWord = next(tokenizer) if tCat is not None or (curQuotedString is None and curWord is None): self.__syntaxError("Missing data for item _%s.%s" % (curCatName, curAttName)) if curWord is not None: # # Validation check token for misplaced reserved words - # reservedWord, state = self.__getState(curWord) if reservedWord is not None: self.__syntaxError("Unexpected reserved word: %s" % (reservedWord)) curRow.append(curWord) elif curQuotedString is not None: curRow.append(curQuotedString) else: self.__syntaxError("Missing value in item-value pair") curCatName, curAttName, curQuotedString, curWord = next( tokenizer) continue # # Process a loop_ declaration and associated data - # elif state == "ST_TABLE": # The category name in the next curCatName,curAttName pair # defines the name of the category container. curCatName, curAttName, curQuotedString, curWord = next( tokenizer) if curCatName is None or curAttName is None: self.__syntaxError("Unexpected token in loop_ declaration") return # Check for a previous category declaration. if curCatName in categoryIndex: self.__syntaxError( "Duplicate category declaration in loop_") return curCategory = DataCategory(curCatName) # # check if we have all of the selection if not excludeFlag and self.__allSelected( curContainer, catSelectD): return try: if catSelectD: if not excludeFlag and curCatName in catSelectD: curContainer.append(curCategory) elif excludeFlag and curCatName not in catSelectD: curContainer.append(curCategory) else: logger.debug( "Skipped unselected/excluded category %s", curCatName) else: curContainer.append(curCategory) except AttributeError: self.__syntaxError( "loop_ declaration outside of data_ block or save_ frame" ) return curCategory.appendAttribute(curAttName) # Read the rest of the loop_ declaration while True: curCatName, curAttName, curQuotedString, curWord = next( tokenizer) if curCatName is None: break if curCatName != curCategory.getName(): self.__syntaxError( "Changed category name in loop_ declaration") return curCategory.appendAttribute(curAttName) # If the next token is a 'word', check it for any reserved words - if curWord is not None: reservedWord, state = self.__getState(curWord) if reservedWord is not None: if reservedWord == "stop": return else: self.__syntaxError( "Unexpected reserved word after loop declaration: %s" % (reservedWord)) # Read the table of data for this loop_ - while True: curRow = [] curCategory.append(curRow) for _ in curCategory.getAttributeList(): if curWord is not None: curRow.append(curWord) elif curQuotedString is not None: curRow.append(curQuotedString) curCatName, curAttName, curQuotedString, curWord = next( tokenizer) # loop_ data processing ends if - # A new _category.attribute is encountered if curCatName is not None: break # A reserved word is encountered if curWord is not None: reservedWord, state = self.__getState(curWord) if reservedWord is not None: break continue elif state == "ST_DEFINITION": # Ignore trailing unnamed saveframe delimiters e.g. 'save' sName = self.__getContainerName(curWord) if sName: curContainer = DefinitionContainer(sName) containerList.append(curContainer) categoryIndex = {} curCategory = None else: # reset current container to the last data contatiner curContainer = previousDataContainer curCatName, curAttName, curQuotedString, curWord = next( tokenizer) elif state == "ST_DATA_CONTAINER": # dName = self.__getContainerName(curWord) if not dName: dName = "unidentified" curContainer = DataContainer(dName) containerList.append(curContainer) categoryIndex = {} curCategory = None previousDataContainer = curContainer curCatName, curAttName, curQuotedString, curWord = next( tokenizer) elif state == "ST_STOP": ### # curCatName, curAttName, curQuotedString, curWord = tokenizer.next() continue elif state == "ST_GLOBAL": curContainer = DataContainer("blank-global") curContainer.setGlobal() containerList.append(curContainer) categoryIndex = {} curCategory = None curCatName, curAttName, curQuotedString, curWord = next( tokenizer) elif state == "ST_UNKNOWN": self.__syntaxError("Unrecogized syntax element: " + str(curWord)) return