def copy_source_data_files(self): uniprot_ref_filepath = get_installed_resource_filepath( os.path.join('resources', 'uniprot-search-abl1.xml.gz')) if not os.path.exists(os.path.join(external_data_dirpath, 'UniProt')): os.mkdir(os.path.join(external_data_dirpath, 'UniProt')) with gzip.open(uniprot_ref_filepath) as uniprot_ref_file: with open( os.path.join(external_data_dirpath, 'UniProt', 'uniprot-search.xml'), 'w') as uniprot_test_file: uniprot_test_file.write(uniprot_ref_file.read()) gene2pubmed_ref_filepath = get_installed_resource_filepath( os.path.join('resources', 'gene2pubmed-abl1.gz')) if not os.path.exists(os.path.join(external_data_dirpath, 'NCBI_Gene')): os.mkdir(os.path.join(external_data_dirpath, 'NCBI_Gene')) shutil.copy( gene2pubmed_ref_filepath, os.path.join(external_data_dirpath, 'NCBI_Gene', 'gene2pubmed.gz')) bindingdb_ref_filepath = get_installed_resource_filepath( os.path.join('resources', 'BindingDB-abl1.tab.gz')) if not os.path.exists(os.path.join(external_data_dirpath, 'BindingDB')): os.mkdir(os.path.join(external_data_dirpath, 'BindingDB')) with gzip.open(bindingdb_ref_filepath) as bindingdb_ref_file: with open( os.path.join(external_data_dirpath, 'BindingDB', 'BindingDB_All.tab'), 'w') as bindingdb_test_file: bindingdb_test_file.write(bindingdb_ref_file.read()) cbioportal_ref_filepath = get_installed_resource_filepath( os.path.join('resources', 'cbioportal-mutations-abl1.xml.gz')) if not os.path.exists(os.path.join(external_data_dirpath, 'cBioPortal')): os.mkdir(os.path.join(external_data_dirpath, 'cBioPortal')) with gzip.open(cbioportal_ref_filepath) as cbioportal_ref_file: with open( os.path.join(external_data_dirpath, 'cBioPortal', 'cbioportal-mutations.xml'), 'w') as cbioportal_test_file: cbioportal_test_file.write(cbioportal_ref_file.read()) oncotator_ref_filepath = get_installed_resource_filepath( os.path.join('resources', 'oncotator-data-abl1.json.gz')) shutil.copy( oncotator_ref_filepath, os.path.join(external_data_dirpath, 'cBioPortal', 'oncotator-data.json.gz'))
def copy_source_data_files(self): uniprot_ref_filepath = get_installed_resource_filepath( os.path.join('resources', 'uniprot-search-abl1.xml.gz') ) if not os.path.exists(os.path.join(external_data_dirpath, 'UniProt')): os.mkdir(os.path.join(external_data_dirpath, 'UniProt')) with gzip.open(uniprot_ref_filepath) as uniprot_ref_file: with open( os.path.join(external_data_dirpath, 'UniProt', 'uniprot-search.xml'), 'w' ) as uniprot_test_file: uniprot_test_file.write(uniprot_ref_file.read()) gene2pubmed_ref_filepath = get_installed_resource_filepath( os.path.join('resources', 'gene2pubmed-abl1.gz') ) if not os.path.exists(os.path.join(external_data_dirpath, 'NCBI_Gene')): os.mkdir(os.path.join(external_data_dirpath, 'NCBI_Gene')) shutil.copy( gene2pubmed_ref_filepath, os.path.join(external_data_dirpath, 'NCBI_Gene', 'gene2pubmed.gz') ) bindingdb_ref_filepath = get_installed_resource_filepath( os.path.join('resources', 'BindingDB-abl1.tab.gz') ) if not os.path.exists(os.path.join(external_data_dirpath, 'BindingDB')): os.mkdir(os.path.join(external_data_dirpath, 'BindingDB')) with gzip.open(bindingdb_ref_filepath) as bindingdb_ref_file: with open( os.path.join(external_data_dirpath, 'BindingDB', 'BindingDB_All.tab'), 'w' ) as bindingdb_test_file: bindingdb_test_file.write(bindingdb_ref_file.read()) cbioportal_ref_filepath = get_installed_resource_filepath( os.path.join('resources', 'cbioportal-mutations-abl1.xml.gz') ) if not os.path.exists(os.path.join(external_data_dirpath, 'cBioPortal')): os.mkdir(os.path.join(external_data_dirpath, 'cBioPortal')) with gzip.open(cbioportal_ref_filepath) as cbioportal_ref_file: with open( os.path.join(external_data_dirpath, 'cBioPortal', 'cbioportal-mutations.xml'), 'w' ) as cbioportal_test_file: cbioportal_test_file.write(cbioportal_ref_file.read()) oncotator_ref_filepath = get_installed_resource_filepath( os.path.join('resources', 'oncotator-data-abl1.json.gz') ) shutil.copy( oncotator_ref_filepath, os.path.join(external_data_dirpath, 'cBioPortal', 'oncotator-data.json.gz') )
def test_gather_pdb(): with projecttest_context(set_up_project_stage='uniprot'): pdb_and_sifts_structure_files_dir = get_installed_resource_filepath( os.path.join('resources', 'structures')) GatherPDB(structure_dirs=pdb_and_sifts_structure_files_dir) first_pdb_chain = models.PDBChain.query.first() assert first_pdb_chain.observed_seq_aln_exp is not None
def test_gather_pdb(): with projecttest_context(set_up_project_stage='uniprot'): pdb_and_sifts_structure_files_dir = get_installed_resource_filepath( os.path.join('resources', 'structures') ) GatherPDB(structure_dirs=pdb_and_sifts_structure_files_dir) first_pdb_chain = models.PDBChain.query.first() assert first_pdb_chain.observed_seq_aln_exp is not None
def test_retrieve_uniprot_matches_reference(): xml_text = retrieve_uniprot('mnemonic:ABL1_HUMAN') xml_root = etree.fromstring(xml_text, xml_parser) ref_xml_filepath = get_installed_resource_filepath(os.path.join('resources', 'uniprot-search-abl1.xml.gz')) with gzip.open(ref_xml_filepath) as ref_xml_file: ref_xml_root = etree.parse(ref_xml_file, xml_parser) entry_name = xml_root.find('entry/name').text ref_entry_name = ref_xml_root.find('entry/name').text assert entry_name == ref_entry_name
def test_retrieve_uniprot_matches_reference(): xml_text = retrieve_uniprot('mnemonic:ABL1_HUMAN') xml_root = etree.fromstring(xml_text, xml_parser) ref_xml_filepath = get_installed_resource_filepath( os.path.join('resources', 'uniprot-search-abl1.xml.gz')) with gzip.open(ref_xml_filepath) as ref_xml_file: ref_xml_root = etree.parse(ref_xml_file, xml_parser) entry_name = xml_root.find('entry/name').text ref_entry_name = ref_xml_root.find('entry/name').text assert entry_name == ref_entry_name
def test_extract_sifts_seq(): sifts_filepath = get_installed_resource_filepath(os.path.join( 'resources', '4L00.xml.gz' )) seq = 'MQYLNIKEDCNAMAFCAKMRSSKKTEVNLEAPEPGVEVIFYLSDREPLRLGSGEYTAEEL\ CIRAAQACRISPLCHNLFALYDENTKLWYAPNRTITVDDKMSLRLHYRMRFYFTNWHGTN\ DNEQSVWRHSPKKQKNGYEKKKIPDATPLLDASSLEYLFAQGQYDLVKCLAPIRDPKTEQ\ DGHDIENECLGMAVLAISHYAMMKKMQLPELPKDISYKRYIPETLNKSIRQRNLLTRMRI\ NNVFKDFLKEFNNKTICDSSVSTHDLKVKYLATLETLTKHYGAEIFETSMLLISSENEMN\ WFHSNDGGNVLYYEVMVTGNLGIQWRHKPNVVSVEKEKNKLKRKKLENKHKKDEEKNKIR\ EEWNNFSYFPEITHIVIKESVVSINKQDNKKMELKLSSHEEALSFVSLVDGYFRLTADAH\ HYLCTDVAPPLIVHNIQNGCHGPICTEYAINKLRQEGSEEGMYVLRWSCTDFDNILMTVT\ CFEKSEQVQGAQKQFKNFQIEVQKGRYSLHGSDRSFPSLGDLMSHLKKQILRTDNISFML\ KRCCQPKPREISNLLVATKKAQEWQPVYPMSQLSFDRILKKDLVQGEHLGRGTRTHIYSG\ TLMDYKDDEGTSEEKKIKVILKVLDPSHRDISLAFFEAASMMRQVSHKHIVYLYGVCVRD\ VENIMVEEFVEGGPLDLFMHRKSDVLTTPWKFKVAKQLASALSYLEDKDLVHGNVCTKNL\ LLAREGIDSECGPFIKLSDPGIPITVLSRQECIERIPWIAPECVEDSKNLSVAADKWSFG\ TTLWEICYNGEIPLKDKTLIEKERFYESRCRPVTPSCKELADLMTRCMNYDPNQRPFFRA\ IMRDINKLEEQNPDIVSEKKPATEVDPTHFEKRFLKRIRDLGEGHFGKVELCRYDPEGDN\ TGEQVAVKSLKPESGGNHIADLKKEIEILRNLYHENIVKYKGICTEDGGNGIKLIMEFLP\ SGSLKEYLPKNKNKINLKQQLKYAVQICKGMDYLGSRQYVHRDLAARNVLVESEHQVKIG\ DFGLTKAIETDKEYYTVKDDRDSPVFWYAPECLMQSKFYIASDVWSFGVTLHELLTYCDS\ DSSPMALFLKMIGPTHGQMTVTRLVNTLKEGKRLPCPPNCPDEVYQLMRKCWEFQPSNRT\ SFQNLIEGFEALLK' pdb_chain_obj = extract_sifts_seq(sifts_filepath, 'P23458', 'JAK1_HUMAN', '4L00', 'A', seq) assert pdb_chain_obj['experimental_seq_aln_conflicts'] == '--------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ ---------------------------------------------------gstsAQEWQPVYPMSQLSFD\ RILKKDLVQGEHLGRGTRTHIYSGTLMDYKDDEGTSEEKKIKVILKVLDPSHRDISLAFFEAASMMRQVSH\ KHIVYLYGVCVRDVENIMVEEFVEGGPLDLFMHRKSDVLTTPWKFKVAKQLASALSYLEDKDLVHGNVCTK\ NLLLAREGIDSECGPFIKLSDPGIPITVLSRQECIERIPWIAPECVEDSKNLSVAADKWSFGTTLWEICYN\ GEIPLKDKTLIEKERFYESRCRPVTPSCKELADLMTRCMNYDPNQRPFFRAIMRDINKLEEQNPDIVSEKK\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ ----------' assert pdb_chain_obj['observed_seq_aln'] == '-----------------------\
def test_extract_sifts_seq(): sifts_filepath = get_installed_resource_filepath( os.path.join('resources', '4L00.xml.gz')) seq = 'MQYLNIKEDCNAMAFCAKMRSSKKTEVNLEAPEPGVEVIFYLSDREPLRLGSGEYTAEEL\ CIRAAQACRISPLCHNLFALYDENTKLWYAPNRTITVDDKMSLRLHYRMRFYFTNWHGTN\ DNEQSVWRHSPKKQKNGYEKKKIPDATPLLDASSLEYLFAQGQYDLVKCLAPIRDPKTEQ\ DGHDIENECLGMAVLAISHYAMMKKMQLPELPKDISYKRYIPETLNKSIRQRNLLTRMRI\ NNVFKDFLKEFNNKTICDSSVSTHDLKVKYLATLETLTKHYGAEIFETSMLLISSENEMN\ WFHSNDGGNVLYYEVMVTGNLGIQWRHKPNVVSVEKEKNKLKRKKLENKHKKDEEKNKIR\ EEWNNFSYFPEITHIVIKESVVSINKQDNKKMELKLSSHEEALSFVSLVDGYFRLTADAH\ HYLCTDVAPPLIVHNIQNGCHGPICTEYAINKLRQEGSEEGMYVLRWSCTDFDNILMTVT\ CFEKSEQVQGAQKQFKNFQIEVQKGRYSLHGSDRSFPSLGDLMSHLKKQILRTDNISFML\ KRCCQPKPREISNLLVATKKAQEWQPVYPMSQLSFDRILKKDLVQGEHLGRGTRTHIYSG\ TLMDYKDDEGTSEEKKIKVILKVLDPSHRDISLAFFEAASMMRQVSHKHIVYLYGVCVRD\ VENIMVEEFVEGGPLDLFMHRKSDVLTTPWKFKVAKQLASALSYLEDKDLVHGNVCTKNL\ LLAREGIDSECGPFIKLSDPGIPITVLSRQECIERIPWIAPECVEDSKNLSVAADKWSFG\ TTLWEICYNGEIPLKDKTLIEKERFYESRCRPVTPSCKELADLMTRCMNYDPNQRPFFRA\ IMRDINKLEEQNPDIVSEKKPATEVDPTHFEKRFLKRIRDLGEGHFGKVELCRYDPEGDN\ TGEQVAVKSLKPESGGNHIADLKKEIEILRNLYHENIVKYKGICTEDGGNGIKLIMEFLP\ SGSLKEYLPKNKNKINLKQQLKYAVQICKGMDYLGSRQYVHRDLAARNVLVESEHQVKIG\ DFGLTKAIETDKEYYTVKDDRDSPVFWYAPECLMQSKFYIASDVWSFGVTLHELLTYCDS\ DSSPMALFLKMIGPTHGQMTVTRLVNTLKEGKRLPCPPNCPDEVYQLMRKCWEFQPSNRT\ SFQNLIEGFEALLK' pdb_chain_obj = extract_sifts_seq(sifts_filepath, 'P23458', 'JAK1_HUMAN', '4L00', 'A', seq) assert pdb_chain_obj['experimental_seq_aln_conflicts'] == '--------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ ---------------------------------------------------gstsAQEWQPVYPMSQLSFD\ RILKKDLVQGEHLGRGTRTHIYSGTLMDYKDDEGTSEEKKIKVILKVLDPSHRDISLAFFEAASMMRQVSH\ KHIVYLYGVCVRDVENIMVEEFVEGGPLDLFMHRKSDVLTTPWKFKVAKQLASALSYLEDKDLVHGNVCTK\ NLLLAREGIDSECGPFIKLSDPGIPITVLSRQECIERIPWIAPECVEDSKNLSVAADKWSFGTTLWEICYN\ GEIPLKDKTLIEKERFYESRCRPVTPSCKELADLMTRCMNYDPNQRPFFRAIMRDINKLEEQNPDIVSEKK\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ -----------------------------------------------------------------------\ ----------' assert pdb_chain_obj['observed_seq_aln'] == '-----------------------\
def write_manual_overrides_file(self): if not os.path.exists(manual_overrides_filename): template_manual_overrides_filepath = get_installed_resource_filepath( os.path.join('resources', 'template-manual_overrides.yaml')) shutil.copy(template_manual_overrides_filepath, manual_overrides_filename)
def write_wsgi_file(self): if not os.path.exists(wsgi_filename): template_wsgi_filepath = get_installed_resource_filepath( os.path.join('resources', 'template-wsgi.py')) shutil.copy(template_wsgi_filepath, wsgi_filename)
def __init__(self): self.structure_dirs = get_installed_resource_filepath( os.path.join('resources', 'structures')) self.uniprot_query = 'mnemonic:ABL1_HUMAN' self.uniprot_domain_regex = '^Protein kinase(?!; truncated)(?!; inactive)'
def write_manual_overrides_file(self): if not os.path.exists(manual_overrides_filename): template_manual_overrides_filepath = get_installed_resource_filepath( os.path.join('resources', 'template-manual_overrides.yaml') ) shutil.copy(template_manual_overrides_filepath, manual_overrides_filename)
def write_wsgi_file(self): if not os.path.exists(wsgi_filename): template_wsgi_filepath = get_installed_resource_filepath( os.path.join('resources', 'template-wsgi.py') ) shutil.copy(template_wsgi_filepath, wsgi_filename)
def __init__(self): self.structure_dirs = get_installed_resource_filepath( os.path.join('resources', 'structures') ) self.uniprot_query = 'mnemonic:ABL1_HUMAN' self.uniprot_domain_regex = '^Protein kinase(?!; truncated)(?!; inactive)'