def upgrade_chemcomps_and_create_entities_where_needed( entry: pynmrstar.Entry, schema: pynmrstar.Schema) -> None: """ Generates an entity saveframe for each chem comp saveframe. """ # Store a mapping of chem_comp name to new entity name chem_comp_entity_map = {} need_linking = [] linked_items = set(entry.get_tag('_Entity_assembly.Entity_label')) for linked_item in linked_items: # Remove the '$' from the beginning of the tag linked_saveframe = entry.get_saveframe_by_name(linked_item[1:]) if linked_saveframe.category == 'chem_comp': need_linking.append(linked_saveframe) need_linking = _sort_saveframes(list(need_linking)) # Create the entity for the chem_comps that need linking for saveframe in need_linking: if 'PDB_code' in saveframe and saveframe['PDB_code'][ 0] not in pynmrstar.definitions.NULL_VALUES: try: chemcomp_entry = pynmrstar.Entry.from_database( 'chemcomp_' + saveframe['PDB_code'][0].upper()) except IOError: saveframe['Note_to_annotator'] = 'Attempted to automatically look up the chem_comp and entity' \ ' from the PDB_code, but it isn\'t valid. Please rectify.' chem_comp_entity_map[ saveframe.name] = create_entity_for_saveframe_and_attach( entry, saveframe, schema) continue chemcomp_saveframe = chemcomp_entry.get_saveframes_by_category( 'chem_comp')[0] chemcomp_saveframe['Paramagnetic'] = saveframe['Paramagnetic'][0] chemcomp_saveframe['Aromatic'] = saveframe['Aromatic'][0] if 'details' in saveframe: chemcomp_saveframe['Details'] = saveframe['Details'][0] new_entity = chemcomp_entry.get_saveframes_by_category('entity')[0] new_entity['Paramagnetic'] = saveframe['Paramagnetic'][0] # Replace the existing saveframes with the new ones (first rename, to preserve the links) entry.rename_saveframe(saveframe.name, chemcomp_saveframe.name) entry[chemcomp_saveframe.name] = chemcomp_saveframe entry.add_saveframe(new_entity) chem_comp_entity_map[saveframe.name] = new_entity.name else: chem_comp_entity_map[ saveframe.name] = create_entity_for_saveframe_and_attach( entry, saveframe, schema) # Update the entity_assembly loop in each assembly to point to the entity rather than the chem_comp for each_entity_assembly in entry.get_loops_by_category( '_Entity_assembly'): entity_label_col = each_entity_assembly.tag_index('Entity_label') for row in each_entity_assembly.data: if row[entity_label_col][1:] in chem_comp_entity_map: row[entity_label_col] = f"${chem_comp_entity_map[row[entity_label_col][1:]]}"
def create_entity_for_saveframe_and_attach(parent_entry: pynmrstar.Entry, saveframe: pynmrstar.Saveframe, schema: pynmrstar.Schema) -> str: """ For a chem_comp, create an entity for it and attach it to the entry. Return the new entry name. """ next_entity: int = max([ int(x.name.split('_')[-1]) for x in parent_entry.get_saveframes_by_category('entity') ]) + 1 new_entity = pynmrstar.Saveframe.from_template( 'entity', name='entity_%s' % next_entity, schema=schema, all_tags=False, entry_id=parent_entry.entry_id) new_entity.loops = [] new_entity['Name'] = saveframe['Name'][0] new_entity['Paramagnetic'] = saveframe['Paramagnetic'][0] new_entity['Type'] = 'non-polymer' new_entity['Ambiguous_conformational_states'] = 'no' new_entity['Nstd_chirality'] = 'no' new_entity['Nstd_linkage'] = 'no' new_entity['Thiol_state'] = 'not available' new_entity.add_missing_tags(schema=schema) comp_index_loop: pynmrstar.Loop = pynmrstar.Loop.from_scratch( '_Entity_comp_index') comp_index_loop.add_tag(['ID', 'Comp_ID', 'Comp_label', 'Entry_ID']) comp_index_loop.add_data([ 1, saveframe['ID'][0], '$' + saveframe['Sf_framecode'][0], parent_entry.entry_id ]) comp_index_loop.add_missing_tags(schema=schema) if '_Entity_comp_index' in new_entity: del new_entity['_Entity_comp_index'] new_entity.add_loop(comp_index_loop) parent_entry.add_saveframe(new_entity) return new_entity.name
args = read_args() is_tty = sys.stdin.isatty() if is_tty and len(args.files) == 0: parser.print_help() print() msg = """I require at least 1 argument or input stream with a chemical_shift_list frame""" exit_error(msg) entries = [] try: if len(args.files) == 0: lines = check_stream() if len(lines) != 0: entries.append(Entry.from_string(lines)) else: exit_error("Error: input appears to be empty") else: for file in args.files: entries.append(Entry.from_file(file)) except OSError as e: msg = f"couldn't open target nef file because {e}" exit_error(msg) for entry in entries: offset_residue_numbers(entry, args.chain, args.offset) # print(entry)
def data_as_nef(overall_result): entry = Entry.from_scratch('default') for table_name, table_data in overall_result.items(): category = "ccpn_distance_restraint_violation_list" frame_code = f'{category}_{table_name}' frame = Saveframe.from_scratch(frame_code, category) entry.add_saveframe(frame) frame.add_tag("sf_category", category) frame.add_tag("sf_framecode", frame_code) frame.add_tag( "nef_spectrum", f"nef_nmr_spectrum_{list(table_data.values())[0]['restraint-list']}" ) frame.add_tag( "nef_restraint_list", f"nef_distance_restraint_list_{list(table_data.values())[0]['restraint-list']}" ) frame.add_tag("program", 'Xplor-NIH') frame.add_tag("program_version", UNUSED) frame.add_tag("protocol", 'marvin/pasd,refine') frame.add_tag("protocol_version", UNUSED) frame.add_tag("protocol_parameters", UNUSED) lp = Loop.from_scratch() tags = ('index', 'model_id', 'restraint_id', 'restraint_sub_id', 'chain_code_1', 'sequence_code_1', 'residue_name_1', 'atom_name_1', 'chain_code_2', 'sequence_code_2', 'residue_name_2', 'atom_name_2', 'weight', 'probability', 'lower_limit', 'upper_limit', 'distance', 'violation', 'violation_file', 'structure_file', 'structure_index', 'nef_peak_id', 'comment') lp.set_category('ccpn_restraint_violation') lp.add_tag(tags) for index, (indices, line_data) in enumerate(table_data.items()): indices = list(indices) indices = [index, *indices] indices[0] += 1 indices[2] += 1 indices[3] += 1 #TODO: conversion of SEGID to chain ID maybe too crude selection_1 = line_data['selection-1'] selection_1[0] = selection_1[0].strip() selection_2 = line_data['selection-2'] selection_2[0] = selection_2[0].strip() data = [ *indices, *selection_1, *selection_2, 1.0, line_data['probability'], line_data['min'], line_data['max'], # GST: this removes trailing rounding errors without loss of accuracy round(line_data['dist'], 10), round(line_data['viol'], 10), line_data['violation-file'], line_data['structure-file'], 1, line_data['restraint-number'], line_data['comment'] ] lp.add_data(data) frame.add_loop(lp) return str(entry)
print(args) is_tty = sys.stdin.isatty() if is_tty and len(args.files) == 0: parser.print_help() print() msg = """I require at least 1 argument or input stream with a chemical_shift_list frame""" exit_error(msg) entries = [] try: if len(args.files) == 0: lines = check_stream() if len(lines) != 0: entries.append((Entry.from_string(lines), '< stdin')) else: exit_error(("Error: input appears to be empty")) else: for file in args.files: if file != '--': entries.append((Entry.from_file(file), file)) except OSError as e: msg = f"couldn't open target nef file because {e}" exit_error(msg) for entry, file in entries: tabulate_frames(entry, file, args) # print(entry)
return result if __name__ == '__main__': parser = create_parser() args = parser.parse_args() chain = args.chain_code file_name = args.file_names[0] with open(file_name, 'r') as lines: sequence = read_sequence(lines=lines, chain_code=args.chain_code) entry_name = args.entry_name.replace(' ', '_') entry = Entry.from_scratch(entry_name) category = "nef_molecular_system" frame_code = f'{category}_{entry_name}' frame = Saveframe.from_scratch(frame_code, category) entry.add_saveframe(frame) frame.add_tag("sf_category", category) frame.add_tag("sf_framecode", frame_code) loop = Loop.from_scratch() frame.add_loop(loop) tags = ('index', 'chain_code', 'sequence_code', 'residue_name', 'linking', 'residue_variant', 'cis_peptide')
def merge_entries(template_entry: pynmrstar.Entry, existing_entry: pynmrstar.Entry, new_schema: pynmrstar.Schema, preserve_entry_information: bool = False): """ By default it does not copy over the entry information - but it should for cloned entries, so the preserve_entry_information boolean is available.""" existing_entry.normalize() # Rename the saveframes in the uploaded entry before merging them for category in existing_entry.category_list: for x, saveframe in enumerate( _sort_saveframes( existing_entry.get_saveframes_by_category(category))): # Set the "Name" tag if it isn't already set if (saveframe.tag_prefix + '.name').lower() in new_schema.schema: try: saveframe.add_tag('Name', saveframe['sf_framecode'][0].replace( "_", " "), update=False) except ValueError: pass new_name = "%s_%s" % (saveframe.category, x + 1) if saveframe.name != new_name: existing_entry.rename_saveframe(saveframe.name, new_name) for category in existing_entry.category_list: delete_saveframes = template_entry.get_saveframes_by_category(category) for saveframe in delete_saveframes: if saveframe.category == "entry_interview": continue del template_entry[saveframe] for saveframe in existing_entry.get_saveframes_by_category(category): # Don't copy over the entry interview at all if saveframe.category == "entry_interview": continue # If the saveframe isn't in the dictionary, or has some other issue, better to skip it # than to crash try: new_saveframe = pynmrstar.Saveframe.from_template( category, name=saveframe.name, entry_id=template_entry.entry_id, default_values=True, schema=new_schema, all_tags=True) except ValueError: continue frame_prefix_lower = saveframe.tag_prefix.lower() # Don't copy the tags from entry_information if saveframe.category != "entry_information" or preserve_entry_information: for tag in saveframe.tags: lower_tag = tag[0].lower() if lower_tag not in [ 'sf_category', 'sf_framecode', 'id', 'entry_id', 'nmr_star_version', 'original_nmr_star_version', 'atomic_coordinate_file_name', 'atomic_coordinate_file_syntax', 'constraint_file_name' ]: fqtn = frame_prefix_lower + '.' + lower_tag if fqtn in new_schema.schema or lower_tag == '_deleted': new_saveframe.add_tag(tag[0], tag[1], update=True) for loop in saveframe.loops: # Don't copy the experimental data loops if loop.category == "_Upload_data": continue lower_tags = [_.lower() for _ in loop.tags] try: tags_to_pull = [ _ for _ in new_saveframe[loop.category].tags if _.lower() in lower_tags ] # Skip loops that don't exist in the schema used except KeyError: continue filtered_original_loop = loop.filter(tags_to_pull) filtered_original_loop.add_missing_tags(schema=new_schema, all_tags=True) new_saveframe[ filtered_original_loop.category] = filtered_original_loop template_entry.add_saveframe(new_saveframe) # Strip off any loop Entry_ID tags from the original entry for saveframe in template_entry.frame_list: for loop in saveframe: for tag in loop.tags: fqtn = (loop.category + "." + tag).lower() try: tag_schema = new_schema.schema[fqtn] if tag_schema['Natural foreign key'] == '_Entry.ID': loop[tag] = [None] * len(loop[tag]) except KeyError: pass
def deposit(self, final_entry: pynmrstar.Entry) -> int: """ Deposits an entry into ETS. """ self.raise_write_errors() if not self.metadata['email_validated']: raise RequestError( 'You must validate your e-mail before deposition.') contact_emails: List[str] = final_entry.get_loops_by_category( "_Contact_Person")[0].get_tag(['Email_address']) if self.metadata['author_email'] not in contact_emails: raise RequestError( 'At least one contact person must have the email of the original deposition creator.' ) existing_entry_id = self.get_entry().entry_id if existing_entry_id != final_entry.entry_id: raise RequestError( 'Invalid deposited entry. The ID must match that of this deposition.' ) logging.info('Depositing deposition %s' % final_entry.entry_id) # Determine which schema version the entry is using schema: pynmrstar.Schema = pynmrstar.Schema( get_schema(self.metadata['schema_version'], schema_format='xml')) # Add tags stripped by the deposition interface final_entry.add_missing_tags(schema=schema) # We'll use this to assign Experiment_name tags later experiment_names: dict = {} try: experiment_names = dict( final_entry.get_loops_by_category('_Experiment')[0].get_tag( ['id', 'name'])) except IndexError: pass # Assign the PubMed ID for citation in final_entry.get_saveframes_by_category('citations'): if citation['PubMed_ID'] and citation['PubMed_ID'] != ".": update_citation_with_pubmed(citation, schema=schema) # Generate any necessary entities from chemcomps upgrade_chemcomps_and_create_entities_where_needed(final_entry, schema=schema) for saveframe in final_entry: # Remove all unicode from the entry for tag in saveframe.tag_iterator(): if isinstance(tag[1], str): tag[1] = unidecode.unidecode(tag[1]) # In case only non-convertible unicode characters were there if tag[1] == '': tag[1] = None for loop in saveframe.loops: for row in loop.data: for pos in range(0, len(row)): if isinstance(row[pos], str): row[pos] = unidecode.unidecode(row[pos]) # In case only non-convertible unicode characters were there if row[pos] == '': row[pos] = None # Set the "Experiment_name" tag from the "Experiment_ID" tag if 'Experiment_ID' in loop.tags: name_tag_index = loop.tag_index('Experiment_name') if name_tag_index is None: loop.add_tag('Experiment_name', update_data=True) name_tag_index = loop.tag_index('Experiment_name') id_tag_index = loop.tag_index('Experiment_ID') for row in loop.data: if row[id_tag_index] in experiment_names: row[name_tag_index] = experiment_names[ row[id_tag_index]] # Calculate the tag _Assembly.Number_of_components if saveframe.category == 'assembly': saveframe.add_tag('_Assembly.Number_of_components', len(saveframe['_Entity_assembly'].data), update=True) # Tweak the middle initials for loop_cat in [ final_entry.get_loops_by_category(x) for x in ['_Contact_person', '_Entry_author', '_Citation_author'] ]: for loop in loop_cat: middle_initial_index = loop.tag_index('Middle_initials') first_initial_index = loop.tag_index('First_initial') for row in loop.data: if middle_initial_index and row[middle_initial_index]: row[middle_initial_index] = ".".join( row[middle_initial_index].replace(".", "")) + '.' if first_initial_index and row[middle_initial_index]: row[middle_initial_index] = ".".join( row[middle_initial_index].replace(".", "")) + '.' # Delete the chemcomps if there is no ligand try: organic_count = int( final_entry.get_tag('Assembly.Organic_ligands')[0]) except (ValueError, IndexError, TypeError): organic_count = 1 try: metal_count = int(final_entry.get_tag('Assembly.Metal_ions')[0]) except (ValueError, IndexError, TypeError): metal_count = 1 if metal_count + organic_count == 0: for saveframe in final_entry.get_saveframes_by_category( 'chem_comp'): del final_entry[saveframe] # Insert the loops for residue sequences for entity in final_entry.get_saveframes_by_category('entity'): polymer_code: str = entity['Polymer_seq_one_letter_code'][0] polymer_type: str = entity['Polymer_type'][0] if polymer_code and polymer_code != '.': polymer_code = polymer_code.strip().upper().replace( ' ', '').replace('\n', '') comp_loop = pynmrstar.Loop.from_scratch('_Entity_comp_index') comp_loop.add_tag([ '_Entity_comp_index.ID', '_Entity_comp_index.Auth_seq_ID', '_Entity_comp_index.Comp_ID', '_Entity_comp_index.Comp_label', '_Entity_comp_index.Entry_ID', '_Entity_comp_index.Entity_ID' ]) # For simple DNA, RNA, and proteins if polymer_type in residue_mappings: for x, residue in enumerate(polymer_code): comp_loop.data.append([ x + 1, None, residue_mappings[polymer_type].get(residue, 'X'), None, None, None ]) # If it is something else, it needs to be manually annotated else: for x, residue in enumerate(polymer_code): comp_loop.data.append( [x + 1, None, 'X', None, None, None]) entity.add_loop(comp_loop) polymer_loop = pynmrstar.Loop.from_scratch('_Entity_poly_seq') polymer_loop.add_tag([ '_Entity_poly_seq.Hetero', '_Entity_poly_seq.Mon_ID', '_Entity_poly_seq.Num', '_Entity_poly_seq.Comp_index_ID', '_Entity_poly_seq.Entry_ID', '_Entity_poly_seq.Entity_ID' ]) # For simple DNA, RNA, and proteins if polymer_type in residue_mappings: for x, residue in enumerate(polymer_code): polymer_loop.data.append([ None, residue_mappings[polymer_type].get(residue, 'X'), x + 1, x + 1, None, None ]) # If it is something else, it needs to be manually annotated else: for x, residue in enumerate(polymer_code): polymer_loop.data.append( [x + 1, None, 'X', None, None, None]) entity.add_loop(polymer_loop) # Calculate the values needed to insert into ETS today_str: str = date.today().isoformat() today_date: datetime = datetime.now() # Set the accession and submission date entry_saveframe: pynmrstar.saveframe = final_entry.get_saveframes_by_category( 'entry_information')[0] entry_saveframe['Submission_date'] = today_str entry_saveframe['Accession_date'] = today_str # Do final entry normalization final_entry.normalize(schema=schema) params = { 'source': 'Author', 'submit_type': 'Dep', 'status': 'nd', 'lit_search_required': 'N', 'submission_date': today_str, 'accession_date': today_str, 'last_updated': today_str, 'molecular_system': final_entry['entry_information_1']['Title'][0], 'onhold_status': 'Pub', 'restart_id': final_entry.entry_id } # Dep_release_code_nmr_exptl was wrongly used in place of Release_request in dictionary versions < 3.2.8.1 try: release_status: str = final_entry['entry_information_1'][ 'Dep_release_code_nmr_exptl'][0].upper() except (KeyError, ValueError): release_status = final_entry['entry_information_1'][ 'Release_request'][0].upper() if release_status == 'RELEASE NOW': params['onhold_status'] = today_date.strftime("%m/%d/%y") elif release_status == 'HOLD FOR 4 WEEKS': params['onhold_status'] = ( today_date + relativedelta(weeks=4)).strftime("%m/%d/%y") elif release_status == 'HOLD FOR 8 WEEKS': params['onhold_status'] = ( today_date + relativedelta(weeks=+8)).strftime("%m/%d/%y") elif release_status == 'HOLD FOR 6 MONTHS': params['onhold_status'] = ( today_date + relativedelta(months=+6)).strftime("%m/%d/%y") elif release_status == 'HOLD FOR 1 YEAR': params['onhold_status'] = ( today_date + relativedelta(years=+1)).strftime("%m/%d/%y") elif release_status == 'HOLD FOR PUBLICATION': params['onhold_status'] = 'Pub' else: raise ServerError('Invalid release code.') contact_loop: pynmrstar.Loop = final_entry.get_loops_by_category( "_Contact_Person")[0] params['author_email'] = ",".join( contact_loop.get_tag(['Email_address'])) contact_people = [ ', '.join(x) for x in contact_loop.get_tag(['Family_name', 'Given_name']) ] params['contact_person1'] = contact_people[0] params['contact_person2'] = contact_people[1] ranges = configuration['ets']['deposition_ranges'] if len(ranges) == 0: raise ServerError('Server configuration error.') # If they have already deposited, just keep the same BMRB ID bmrbnum = self.metadata.get('bmrbnum', None) if configuration['debug'] and configuration['ets'][ 'host'] == 'CHANGE_ME' and not bmrbnum: bmrbnum = 999999 if bmrbnum: params['bmrbnum'] = bmrbnum else: try: conn = psycopg2.connect( user=configuration['ets']['user'], host=configuration['ets']['host'], database=configuration['ets']['database']) cur = conn.cursor() except psycopg2.OperationalError: logging.exception( 'Could not connect to ETS database. Is the server down, or the configuration wrong?' ) raise ServerError( 'Could not connect to entry tracking system. Please contact us.' ) try: # Determine which bmrbnum to use - one range at a time bmrbnum: Optional[int] = None for id_range in ranges: # Get the existing IDs from ETS bmrb_sql: str = 'SELECT bmrbnum FROM entrylog WHERE bmrbnum >= %s AND bmrbnum <= %s;' cur.execute(bmrb_sql, [id_range[0], id_range[1]]) # Calculate the list of valid IDs existing_ids: set = set([_[0] for _ in cur.fetchall()]) ids_in_range: set = set(range(id_range[0], id_range[1])) assignable_ids = sorted( list(ids_in_range.difference(existing_ids))) # A valid ID has been found in this range if len(assignable_ids) > 0: bmrbnum = assignable_ids[0] break else: logging.warning( 'No valid IDs found in range %d to %d. Continuing to next range...' % (id_range[0], id_range[1])) if not bmrbnum: logging.exception( 'No valid IDs remaining in any of the ranges!') raise ServerError( 'Could not find a valid BMRB ID to assign. Please contact us.' ) params['bmrbnum'] = bmrbnum # Create the deposition record insert_query = """ INSERT INTO entrylog (depnum, bmrbnum, status, submission_date, accession_date, onhold_status, molecular_system, contact_person1, contact_person2, submit_type, source, lit_search_required, author_email, restart_id, last_updated, nmr_dep_code) VALUES (nextval('depnum_seq'), %(bmrbnum)s, %(status)s, %(submission_date)s, %(accession_date)s, %(onhold_status)s, %(molecular_system)s, %(contact_person1)s, %(contact_person2)s, %(submit_type)s, %(source)s, %(lit_search_required)s, %(author_email)s, %(restart_id)s, %(last_updated)s, %(restart_id)s)""" cur.execute(insert_query, params) log_sql = """ INSERT INTO logtable (logid,depnum,actdesc,newstatus,statuslevel,logdate,login) VALUES (nextval('logid_seq'),currval('depnum_seq'),'NEW DEPOSITION','nd',1,now(),'')""" cur.execute(log_sql) conn.commit() except psycopg2.IntegrityError: logging.exception( 'Could not assign the chosen BMRB ID - it was already assigned.' ) conn.rollback() raise ServerError( 'Could not create deposition. Please try again.') # Assign the BMRB ID in all the appropriate places in the entry final_entry.entry_id = bmrbnum # Write the final deposition to disk self.write_file('deposition.str', str(final_entry).encode(), root=True) self.metadata['entry_deposited'] = True self.metadata['deposition_date'] = datetime.utcnow().strftime( "%I:%M %p on %B %d, %Y") self.metadata['bmrbnum'] = bmrbnum self.metadata['server_version_at_deposition'] = get_release() self.commit('Deposition submitted!') # Return the assigned BMRB ID return bmrbnum
'Alternatively, use this BMRB-formatted file. This overrules the above argument' ) parser.add_argument( '-o', type=str, dest='outputPrefix', default='expt', help= 'The prefix to all output files. This script will utilise the order and conditions of spin relaxation experiments ' 'in the input to try to write unique outputs.') args = parser.parse_args() outPrefix = args.outputPrefix inputFile = args.inputTextFile inputID = args.BMRBEntry if not inputFile is None: entry = Entry.from_file(inputFile) elif not inputID is None: entry = Entry.from_database(inputID) else: print("= = ERROR: You must give either an BMRB entry or input file!", file=sys.stderr) parser.print_help() sys.exit(1) listInterestedCategories = [ 'heteronucl_T1_relaxation', 'heteronucl_T2_relaxation', 'heteronucl_NOEs' ] listUnitsString = ['t1_val_units', 't2_val_units', ''] listTypeExpt = ['R1', 'R2', 'NOE']
def read_args(): global parser parser = argparse.ArgumentParser(description='Assign a NEF file based on output from an assignment in NEF format') parser.add_argument('-t', '--target', metavar='TARGET_NEF', type=str, default=None, dest='target', help='target nef file to assign') parser.add_argument(metavar='ASSIGNMENT', nargs=1, dest='assignment') return parser.parse_args() if __name__ == '__main__': args = read_args() try: nef_target_data = Entry.from_file(args.target) except OSError as e: msg = f"couldn't open target nef file because {e}" exit_error(msg) try: res_assign = Entry.from_file(args.assignment[0]) except OSError as e: msg = f"couldn't open residue assignments file because {e}" exit_error(msg) shift_list_frames = [] for frame_name in nef_target_data.frame_dict: if 'nef_chemical_shift_list' in frame_name: shift_list_frames.append(frame_name)
def mars_to_nef(lines, args): pseudo_residue_re = re.compile('[a-zA-Z@]+_([0-9]+)') assignment_sets = {} residue_types = {} for line_number, line in enumerate(lines): line = line.strip() fields = line.split() residue_type, residue_number = fields[0].split('_') residue_number = int(residue_number) residue_types[residue_number] = residue_type assignment_sets[residue_number] = [] for assignment_data in chunks(fields[1:], 2): residue, raw_merit = assignment_data residue_matches = pseudo_residue_re.search(residue).groups() if len(residue_matches) != 1: msg = """couldn't find residue number in pseudo residue line number: {line number} line value: {line} expected a pseudo residue of the form <alpha>_<number> alpha a-z, A-Z or @ """ exit_error(msg) pseudo_residue_number = int(residue_matches[0]) FIXED_MERIT = '(F)' fixed = raw_merit == FIXED_MERIT if fixed: merit = None else: merit = raw_merit.strip('()') try: merit = float(merit) / 100.0 except ValueError: msg = """ couldn't convert merit to float line number: {line number} line value: {line} """ exit_error(msg) assignment = Assignment(pseudo_residue_number, fixed, merit) assignment_sets[residue_number].append(assignment) loop = Loop.from_scratch('ccpn_residue_assignment_default') loop.add_tag( ['serial', 'chain_code', 'residue_number', 'residue_type', 'assignment_serial', 'assignment', 'merit', 'fixed']) data = [] UNUSED_4 = [UNUSED, ] * 4 chain = args.chain for serial, residue_number in enumerate(sorted(assignment_sets)): assignments = assignment_sets[residue_number] out_residue_number = residue_number + args.offset if assignments: for assignment_serial, assignment in enumerate(assignments): line = [serial, chain, out_residue_number, residue_types[residue_number], assignment_serial, assignment.assignment, assignment.merit, assignment.fixed] data.append(line) else: line = [serial, chain, out_residue_number, residue_types[residue_number], *UNUSED_4] data.append(line) loop.data = data save_frame = Saveframe.from_scratch("ccpn_residue_assignments", "ccpn_residue_assignments") FIXED_TAGS = (('ccpn_assignment_program', 'mars'), ('ccpn_assignment_program_version', UNUSED), ('ccpn_assignment_source', UNUSED), ('sf_category', 'ccpn_residue_assignments'), ('sf_ftame_code', 'ccpn_residue_assignments_default') ) for tag, value in FIXED_TAGS: save_frame.add_tag(tag, value) save_frame.add_loop(loop) entry = Entry.from_scratch('test') entry.add_saveframe(save_frame) return entry