def build_translation_table(filename): """Given a file object of a CIF file this will produce a translation table usable by translate """ translation_table = {} data = [] with open(filename, 'r') as raw: parser = PdbxReader(raw) parser.read(data) pdb = data[0] pdb_id = pdb.getName() # If there is no assembly gen then the whole AU is in one file. Here there # are no biological assemblies but I don't know how many models there are, # so we return a defaultdict which will always return '1_555'. if not pdb.getObj('pdbx_struct_assembly_gen'): return {pdb_id: defaultdict(lambda: defaultdict(lambda: '1_555'))} operator_table = table(pdb, 'pdbx_struct_oper_list') if not operator_table: raise MissingOperatorTableError(filename) translation_table[pdb_id] = {} for gen_row in rows(pdb, 'pdbx_struct_assembly_gen'): assembly_id = gen_row['assembly_id'] if not translation_table[pdb_id].get(assembly_id): translation_table[pdb_id][assembly_id] = {} # Here I am assumming that the AU is always 1_555. if '(' in gen_row['oper_expression']: model_builder = DefaultUsingKey(lambda k: 'P_%s' % k) return {pdb_id: defaultdict(lambda: model_builder)} models = gen_row['oper_expression'].split(',') for model in models: name = operator_table[model]['name'] translation_table[pdb_id][assembly_id][model] = name return translation_table