def tmbed(**kwargs) -> Dict[str, Any]: ''' Protocol extracts membrane residues from "embeddings_file". Embeddings must have been generated with ProtT5-XL-U50. ''' check_required(kwargs, ['embeddings_file', 'remapped_sequences_file']) result_kwargs = deepcopy(kwargs) file_manager = get_file_manager(**kwargs) # Download necessary files if needed for file in TmbedAnnotationExtractor.necessary_files: if not result_kwargs.get(file): result_kwargs[file] = get_model_file(model='tmbed', file=file) tmbed_extractor = TmbedAnnotationExtractor(**result_kwargs) # Try to create final file (if this fails, now is better than later) membrane_residues_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'membrane_residues_predictions_file', extension='.fasta') result_kwargs['membrane_residues_predictions_file'] = membrane_residues_predictions_file_path tmbed_sequences = [] with h5py.File(result_kwargs['embeddings_file'], 'r') as embedding_file: for protein_sequence in read_fasta(result_kwargs['remapped_sequences_file']): embedding = np.array(embedding_file[protein_sequence.id]) # Add batch dimension (until we support batch processing) embedding = embedding[None, ] # Sequence lengths (only a single sequence for now) lengths = [len(protein_sequence.seq)] annotations = tmbed_extractor.get_membrane_residues(embedding, lengths) # Gratuitous loop (only a single item for now) # Needs to be changed for batch mode to deepcopy different protein sequences for annotation in annotations: tmbed_sequence = deepcopy(protein_sequence) tmbed_sequence.seq = Seq(convert_list_of_enum_to_string(annotation.membrane_residues)) tmbed_sequences.append(tmbed_sequence) # Write file write_fasta_file(tmbed_sequences, membrane_residues_predictions_file_path) return result_kwargs
def light_attention(model, **kwargs) -> Dict[str, Any]: """ Protocol extracts subcellular locationfrom "embeddings_file". Embeddings can be generated with ProtBert. :param model: either "la_protbert" or "la_prott5". Used to download files """ check_required( kwargs, ['embeddings_file', 'mapping_file', 'remapped_sequences_file']) result_kwargs = deepcopy(kwargs) file_manager = get_file_manager(**kwargs) # Download necessary files if needed for file in LightAttentionAnnotationExtractor.necessary_files: if not result_kwargs.get(file): result_kwargs[file] = get_model_file(model=model, file=file) annotation_extractor = LightAttentionAnnotationExtractor(**result_kwargs) # mapping file will be needed for protein-wide annotations mapping_file = read_csv(result_kwargs['mapping_file'], index_col=0) # Try to create final files (if this fails, now is better than later per_sequence_predictions_file_path = file_manager.create_file( result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'per_sequence_predictions_file', extension='.csv') result_kwargs[ 'per_sequence_predictions_file'] = per_sequence_predictions_file_path with h5py.File(result_kwargs['embeddings_file'], 'r') as embedding_file: for protein_sequence in read_fasta( result_kwargs['remapped_sequences_file']): embedding = np.array(embedding_file[protein_sequence.id]) annotations = annotation_extractor.get_subcellular_location( embedding) # Per-sequence annotations, e.g. subcell loc & membrane boundness mapping_file.at[ protein_sequence.id, 'subcellular_location'] = annotations.localization.value mapping_file.at[protein_sequence.id, 'membrane_or_soluble'] = annotations.membrane.value # Write files mapping_file.to_csv(per_sequence_predictions_file_path) return result_kwargs
def prott5cons(model: str, **kwargs) -> Dict[str, Any]: """ Protocol extracts conservation from "embeddings_file". Embeddings can only be generated with ProtT5-XL-U50. :param model: "t5_xl_u50_conservation". Used to download files """ check_required(kwargs, ['embeddings_file', 'mapping_file', 'remapped_sequences_file']) result_kwargs = deepcopy(kwargs) file_manager = get_file_manager(**kwargs) # Download necessary files if needed for file in ProtT5consAnnotationExtractor.necessary_files: if not result_kwargs.get(file): result_kwargs[file] = get_model_file(model=model, file=file) annotation_extractor = ProtT5consAnnotationExtractor(**result_kwargs) # mapping file will be needed for protein-wide annotations mapping_file = read_mapping_file(result_kwargs["mapping_file"]) # Try to create final files (if this fails, now is better than later conservation_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'conservation_predictions_file', extension='.fasta') result_kwargs['conservation_predictions_file'] = conservation_predictions_file_path cons_sequences = list() with h5py.File(result_kwargs['embeddings_file'], 'r') as embedding_file: for protein_sequence in read_fasta(result_kwargs['remapped_sequences_file']): embedding = np.array(embedding_file[protein_sequence.id]) annotations = annotation_extractor.get_conservation(embedding) cons_sequence = deepcopy(protein_sequence) cons_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.conservation)) cons_sequences.append(cons_sequence) # Write files write_fasta_file(cons_sequences, conservation_predictions_file_path) return result_kwargs
def predict_annotations_using_basic_models(model, **kwargs) -> Dict[str, Any]: """ Protocol extracts secondary structure (DSSP3 and DSSP8), disorder, subcellular location and membrane boundness from "embeddings_file". Embeddings can either be generated with SeqVec or ProtBert. SeqVec models are used in this publication: https://doi.org/10.1186/s12859-019-3220-8 ProtTrans models are used in this publication: https://doi.org/10.1101/2020.07.12.199554 :param model: either "bert_from_publication" or "seqvec_from_publication". Used to download files """ check_required( kwargs, ['embeddings_file', 'mapping_file', 'remapped_sequences_file']) necessary_files = [ 'secondary_structure_checkpoint_file', 'subcellular_location_checkpoint_file' ] result_kwargs = deepcopy(kwargs) file_manager = get_file_manager(**kwargs) # Download necessary files if needed for file in necessary_files: if not result_kwargs.get(file): result_kwargs[file] = get_model_file( model=f'{model}_annotations_extractors', file=file) annotation_extractor = BasicAnnotationExtractor(model, **result_kwargs) # mapping file will be needed for protein-wide annotations mapping_file = read_csv(result_kwargs['mapping_file'], index_col=0) # Try to create final files (if this fails, now is better than later DSSP3_predictions_file_path = file_manager.create_file( result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'DSSP3_predictions_file', extension='.fasta') result_kwargs['DSSP3_predictions_file'] = DSSP3_predictions_file_path DSSP8_predictions_file_path = file_manager.create_file( result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'DSSP8_predictions_file', extension='.fasta') result_kwargs['DSSP8_predictions_file'] = DSSP8_predictions_file_path disorder_predictions_file_path = file_manager.create_file( result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'disorder_predictions_file', extension='.fasta') result_kwargs['disorder_predictions_file'] = disorder_predictions_file_path per_sequence_predictions_file_path = file_manager.create_file( result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'per_sequence_predictions_file', extension='.csv') result_kwargs[ 'per_sequence_predictions_file'] = per_sequence_predictions_file_path # Create sequence containers DSSP3_sequences = list() DSSP8_sequences = list() disorder_sequences = list() with h5py.File(result_kwargs['embeddings_file'], 'r') as embedding_file: for protein_sequence in read_fasta( result_kwargs['remapped_sequences_file']): # Per-AA annotations: DSSP3, DSSP8 and disorder embedding = np.array(embedding_file[protein_sequence.id]) annotations = annotation_extractor.get_annotations(embedding) DSSP3_sequence = deepcopy(protein_sequence) DSSP3_sequence.seq = Seq( convert_list_of_enum_to_string(annotations.DSSP3)) DSSP3_sequences.append(DSSP3_sequence) DSSP8_sequence = deepcopy(protein_sequence) DSSP8_sequence.seq = Seq( convert_list_of_enum_to_string(annotations.DSSP8)) DSSP8_sequences.append(DSSP8_sequence) disorder_sequence = deepcopy(protein_sequence) disorder_sequence.seq = Seq( convert_list_of_enum_to_string(annotations.disorder)) disorder_sequences.append(disorder_sequence) # Per-sequence annotations, e.g. subcell loc & membrane boundness mapping_file.at[ protein_sequence.id, 'subcellular_location'] = annotations.localization.value mapping_file.at[protein_sequence.id, 'membrane_or_soluble'] = annotations.membrane.value # Write files mapping_file.to_csv(per_sequence_predictions_file_path) write_fasta_file(DSSP3_sequences, DSSP3_predictions_file_path) write_fasta_file(DSSP8_sequences, DSSP8_predictions_file_path) write_fasta_file(disorder_sequences, disorder_predictions_file_path) return result_kwargs
def predict_annotations_using_basic_models(model: str, **kwargs) -> Dict[str, Any]: """ Protocol extracts secondary structure (DSSP3 and DSSP8), disorder, subcellular location and membrane boundness from "embeddings_file". Embeddings can either be generated with SeqVec or ProtBert. SeqVec models are used in this publication: https://doi.org/10.1186/s12859-019-3220-8 ProtTrans models are used in this publication: https://doi.org/10.1101/2020.07.12.199554 :param model: either "bert_from_publication" or "seqvec_from_publication". Used to download files """ check_required(kwargs, ['embeddings_file', 'mapping_file', 'remapped_sequences_file']) result_kwargs = deepcopy(kwargs) file_manager = get_file_manager(**kwargs) annotation_extractor = BasicAnnotationExtractor(model, **result_kwargs) # mapping file will be needed for protein-wide annotations mapping_file = read_mapping_file(result_kwargs["mapping_file"]) # Try to create final files (if this fails, now is better than later) DSSP3_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'DSSP3_predictions_file', extension='.fasta') result_kwargs['DSSP3_predictions_file'] = DSSP3_predictions_file_path DSSP8_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'DSSP8_predictions_file', extension='.fasta') result_kwargs['DSSP8_predictions_file'] = DSSP8_predictions_file_path disorder_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'disorder_predictions_file', extension='.fasta') result_kwargs['disorder_predictions_file'] = disorder_predictions_file_path per_sequence_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'per_sequence_predictions_file', extension='.csv') result_kwargs['per_sequence_predictions_file'] = per_sequence_predictions_file_path if 'get_activations' in kwargs and kwargs['get_activations']: DSSP3_raw_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'DSSP3_raw_predictions_file', extension='.csv') result_kwargs['DSSP3_raw_predictions_file'] = DSSP3_raw_predictions_file_path DSSP8_raw_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'DSSP8_raw_predictions_file', extension='.csv') result_kwargs['DSSP8_raw_predictions_file'] = DSSP8_raw_predictions_file_path disorder_raw_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'disorder_raw_predictions_file', extension='.csv') result_kwargs['disorder_raw_predictions_file'] = disorder_raw_predictions_file_path # Create sequence containers DSSP3_sequences = list() DSSP8_sequences = list() disorder_sequences = list() DSSP3_raw = [] DSSP8_raw = [] disorder_raw = [] with h5py.File(result_kwargs['embeddings_file'], 'r') as embedding_file: for protein_sequence in read_fasta(result_kwargs['remapped_sequences_file']): # Per-AA annotations: DSSP3, DSSP8 and disorder embedding = np.array(embedding_file[protein_sequence.id]) annotations = annotation_extractor.get_annotations(embedding) DSSP3_sequence = deepcopy(protein_sequence) DSSP3_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.DSSP3)) DSSP3_sequences.append(DSSP3_sequence) DSSP3_raw_df = DataFrame(annotations.DSSP3_raw[:, :, 0].detach().cpu().numpy().transpose(), columns=['H', 'E', 'C']) DSSP3_raw_df.insert(0, 'residue', range(1, 1 + len(DSSP3_raw_df))) DSSP3_raw_df.insert(0, 'seqID', DSSP3_sequence.id) DSSP3_raw.append(DSSP3_raw_df) DSSP8_sequence = deepcopy(protein_sequence) DSSP8_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.DSSP8)) DSSP8_sequences.append(DSSP8_sequence) DSSP8_raw_df = DataFrame(annotations.DSSP8_raw[:, :, 0].detach().cpu().numpy().transpose(), columns=['G', 'H', 'I', 'B', 'E', 'S', 'T', 'C']) DSSP8_raw_df.insert(0, 'residue', range(1, 1 + len(DSSP8_raw_df))) DSSP8_raw_df.insert(0, 'seqID', DSSP8_sequence.id) DSSP8_raw.append(DSSP8_raw_df) disorder_sequence = deepcopy(protein_sequence) disorder_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.disorder)) disorder_sequences.append(disorder_sequence) disorder_raw_df = DataFrame(annotations.disorder_raw[:, :, 0].detach().cpu().numpy().transpose(), columns=['Order', 'Disorder']) disorder_raw_df.insert(0, 'residue', range(1, 1 + len(disorder_raw_df))) disorder_raw_df.insert(0, 'seqID', disorder_sequence.id) disorder_raw.append(disorder_raw_df) # Per-sequence annotations, e.g. subcell loc & membrane boundness mapping_file.at[protein_sequence.id, 'subcellular_location'] = annotations.localization.value mapping_file.at[protein_sequence.id, 'membrane_or_soluble'] = annotations.membrane.value # Write files mapping_file.to_csv(per_sequence_predictions_file_path) write_fasta_file(DSSP3_sequences, DSSP3_predictions_file_path) write_fasta_file(DSSP8_sequences, DSSP8_predictions_file_path) write_fasta_file(disorder_sequences, disorder_predictions_file_path) if 'get_activations' in kwargs and kwargs['get_activations']: # create files with activations for each multiclass prediction concatenate_dataframe(DSSP3_raw).set_index('seqID').rename_axis(None).to_csv(DSSP3_raw_predictions_file_path) concatenate_dataframe(DSSP8_raw).set_index('seqID').rename_axis(None).to_csv(DSSP8_raw_predictions_file_path) concatenate_dataframe(disorder_raw).set_index('seqID').rename_axis(None).to_csv( disorder_raw_predictions_file_path) return result_kwargs
def bindembed21(**kwargs) -> Dict[str, Any]: """ Protocol extracts binding residues from "alignment_result_file" if possible, and from "embeddings_file", otherwise. :param kwargs: :return: """ check_required(kwargs, ['alignment_results_file', 'embeddings_file', 'mapping_file', 'remapped_sequences_file']) result_kwargs = deepcopy(kwargs) file_manager = get_file_manager(**kwargs) # Download necessary files if needed # for HBI for directory in BindEmbed21HBIAnnotationExtractor.necessary_directories: if not result_kwargs.get(directory): result_kwargs[directory] = get_model_directories_from_zip(model="bindembed21hbi", directory=directory) # for DL for file in BindEmbed21DLAnnotationExtractor.necessary_files: if not result_kwargs.get(file): result_kwargs[file] = get_model_file(model="bindembed21dl", file=file) hbi_extractor = BindEmbed21HBIAnnotationExtractor(**result_kwargs) dl_extractor = BindEmbed21DLAnnotationExtractor(**result_kwargs) # Try to create final files (if this fails, now is better than later metal_binding_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'metal_binding_predictions_file', extension='.fasta') result_kwargs['metal_binding_predictions_file'] = metal_binding_predictions_file_path nuc_binding_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'nucleic_acid_binding_predictions_file', extension='.fasta') result_kwargs['binding_residue_predictions_file'] = nuc_binding_predictions_file_path small_binding_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'small_molecule_binding_predictions_file', extension='.fasta') result_kwargs['binding_residue_predictions_file'] = small_binding_predictions_file_path metal_sequences = list() nuc_sequences = list() small_sequences = list() alignment_results = read_csv(result_kwargs['alignment_results_file'], sep='\t', dtype={'query': 'str', 'target': 'str'}) alignment_results = alignment_results[alignment_results['eval'] < 1E-3].copy() with h5py.File(result_kwargs['embeddings_file'], 'r') as embedding_file: for protein_sequence in read_fasta(result_kwargs['remapped_sequences_file']): # get HBI hit for this query hits = alignment_results[alignment_results['query'].str.match(str(protein_sequence.id))].copy() hits_min_eval = hits[hits['eval'] == min(hits['eval'])] hit_max_pide = hits_min_eval[hits_min_eval['fident'] == max(hits_min_eval['fident'])] metal_sequence = deepcopy(protein_sequence) nuc_sequence = deepcopy(protein_sequence) small_sequence = deepcopy(protein_sequence) hbi_annotations = hbi_extractor.get_binding_residues(hit_max_pide.iloc[0].to_dict()) metal_inference = convert_list_of_enum_to_string(hbi_annotations.metal_ion) nuc_inference = convert_list_of_enum_to_string(hbi_annotations.nucleic_acids) small_inference = convert_list_of_enum_to_string(hbi_annotations.small_molecules) # some part of the sequence was predicted using HBI --> save output and don't run DL method if 'M' in metal_inference or 'N' in nuc_inference or 'S' in small_inference: metal_sequence.seq = Seq(metal_inference) nuc_sequence.seq = Seq(nuc_inference) small_sequence.seq = Seq(small_inference) # no inference containing binding annotations was made --> run bindEmbed21DL else: embedding = np.array(embedding_file[protein_sequence.id]) annotations = dl_extractor.get_binding_residues(embedding) metal_sequence = deepcopy(protein_sequence) nuc_sequence = deepcopy(protein_sequence) small_sequence = deepcopy(protein_sequence) metal_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.metal_ion)) nuc_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.nucleic_acids)) small_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.small_molecules)) metal_sequences.append(metal_sequence) nuc_sequences.append(nuc_sequence) small_sequences.append(small_sequence) # Write files write_fasta_file(metal_sequences, metal_binding_predictions_file_path) write_fasta_file(nuc_sequences, nuc_binding_predictions_file_path) write_fasta_file(small_sequences, small_binding_predictions_file_path) return result_kwargs
def bindembed21hbi(**kwargs) -> Dict[str, Any]: """ Protocol extracts binding residues from "alignment_results_file". :return: """ check_required(kwargs, ['alignment_results_file', 'mapping_file', 'remapped_sequences_file']) result_kwargs = deepcopy(kwargs) file_manager = get_file_manager(**kwargs) # Download necessary files if needed for directory in BindEmbed21HBIAnnotationExtractor.necessary_directories: if not result_kwargs.get(directory): result_kwargs[directory] = get_model_directories_from_zip(model="bindembed21hbi", directory=directory) annotation_extractor = BindEmbed21HBIAnnotationExtractor(**result_kwargs) # Try to create final files (if this fails, now is better than later metal_binding_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'metal_binding_inference_file', extension='.fasta') result_kwargs['metal_binding_inference_file'] = metal_binding_predictions_file_path nuc_binding_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'nucleic_acid_binding_inference_file', extension='.fasta') result_kwargs['binding_residue_inference_file'] = nuc_binding_predictions_file_path small_binding_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'small_molecule_binding_inference_file', extension='.fasta') result_kwargs['binding_residue_inference_file'] = small_binding_predictions_file_path metal_sequences = list() nuc_sequences = list() small_sequences = list() alignment_results = read_csv(result_kwargs['alignment_results_file'], sep='\t', dtype={'query': 'str', 'target': 'str'}) alignment_results = alignment_results[alignment_results['eval'] < 1E-3].copy() for protein_sequence in read_fasta(result_kwargs['remapped_sequences_file']): # get hits for this query hits = alignment_results[alignment_results['query'].str.match(str(protein_sequence.id))].copy() # get hits with minimal E-value hits_min_eval = hits[hits['eval'] == min(hits['eval'])] # get hit with maximal PIDE hit_max_pide = hits_min_eval[hits_min_eval['fident'] == max(hits_min_eval['fident'])] annotations = annotation_extractor.get_binding_residues(hit_max_pide.iloc[0].to_dict()) metal_sequence = deepcopy(protein_sequence) nuc_sequence = deepcopy(protein_sequence) small_sequence = deepcopy(protein_sequence) metal_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.metal_ion)) nuc_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.nucleic_acids)) small_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.small_molecules)) metal_sequences.append(metal_sequence) nuc_sequences.append(nuc_sequence) small_sequences.append(small_sequence) # Write files write_fasta_file(metal_sequences, metal_binding_predictions_file_path) write_fasta_file(nuc_sequences, nuc_binding_predictions_file_path) write_fasta_file(small_sequences, small_binding_predictions_file_path) return result_kwargs
def bindembed21dl(**kwargs) -> Dict[str, Any]: """ Protocol extracts binding residues from "embeddings_file". Results guaranteed only with ProtT5-XL-U50 embeddings. :return: """ check_required(kwargs, ['embeddings_file', 'mapping_file', 'remapped_sequences_file']) result_kwargs = deepcopy(kwargs) file_manager = get_file_manager(**kwargs) # Download necessary files if needed for file in BindEmbed21DLAnnotationExtractor.necessary_files: if not result_kwargs.get(file): result_kwargs[file] = get_model_file(model="bindembed21dl", file=file) annotation_extractor = BindEmbed21DLAnnotationExtractor(**result_kwargs) # Try to create final files (if this fails, now is better than later metal_binding_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'metal_binding_predictions_file', extension='.fasta') result_kwargs['metal_binding_predictions_file'] = metal_binding_predictions_file_path nuc_binding_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'nucleic_acid_binding_predictions_file', extension='.fasta') result_kwargs['binding_residue_predictions_file'] = nuc_binding_predictions_file_path small_binding_predictions_file_path = file_manager.create_file(result_kwargs.get('prefix'), result_kwargs.get('stage_name'), 'small_molecule_binding_predictions_file', extension='.fasta') result_kwargs['binding_residue_predictions_file'] = small_binding_predictions_file_path metal_sequences = list() nuc_sequences = list() small_sequences = list() with h5py.File(result_kwargs['embeddings_file'], 'r') as embedding_file: for protein_sequence in read_fasta(result_kwargs['remapped_sequences_file']): embedding = np.array(embedding_file[protein_sequence.id]) annotations = annotation_extractor.get_binding_residues(embedding) metal_sequence = deepcopy(protein_sequence) nuc_sequence = deepcopy(protein_sequence) small_sequence = deepcopy(protein_sequence) metal_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.metal_ion)) nuc_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.nucleic_acids)) small_sequence.seq = Seq(convert_list_of_enum_to_string(annotations.small_molecules)) metal_sequences.append(metal_sequence) nuc_sequences.append(nuc_sequence) small_sequences.append(small_sequence) # Write files write_fasta_file(metal_sequences, metal_binding_predictions_file_path) write_fasta_file(nuc_sequences, nuc_binding_predictions_file_path) write_fasta_file(small_sequences, small_binding_predictions_file_path) return result_kwargs