Example #1
0
    def copy_source_data_files(self):
        uniprot_ref_filepath = get_installed_resource_filepath(
            os.path.join('resources', 'uniprot-search-abl1.xml.gz'))
        if not os.path.exists(os.path.join(external_data_dirpath, 'UniProt')):
            os.mkdir(os.path.join(external_data_dirpath, 'UniProt'))
        with gzip.open(uniprot_ref_filepath) as uniprot_ref_file:
            with open(
                    os.path.join(external_data_dirpath, 'UniProt',
                                 'uniprot-search.xml'),
                    'w') as uniprot_test_file:
                uniprot_test_file.write(uniprot_ref_file.read())

        gene2pubmed_ref_filepath = get_installed_resource_filepath(
            os.path.join('resources', 'gene2pubmed-abl1.gz'))
        if not os.path.exists(os.path.join(external_data_dirpath,
                                           'NCBI_Gene')):
            os.mkdir(os.path.join(external_data_dirpath, 'NCBI_Gene'))
        shutil.copy(
            gene2pubmed_ref_filepath,
            os.path.join(external_data_dirpath, 'NCBI_Gene', 'gene2pubmed.gz'))

        bindingdb_ref_filepath = get_installed_resource_filepath(
            os.path.join('resources', 'BindingDB-abl1.tab.gz'))
        if not os.path.exists(os.path.join(external_data_dirpath,
                                           'BindingDB')):
            os.mkdir(os.path.join(external_data_dirpath, 'BindingDB'))
        with gzip.open(bindingdb_ref_filepath) as bindingdb_ref_file:
            with open(
                    os.path.join(external_data_dirpath, 'BindingDB',
                                 'BindingDB_All.tab'),
                    'w') as bindingdb_test_file:
                bindingdb_test_file.write(bindingdb_ref_file.read())

        cbioportal_ref_filepath = get_installed_resource_filepath(
            os.path.join('resources', 'cbioportal-mutations-abl1.xml.gz'))
        if not os.path.exists(os.path.join(external_data_dirpath,
                                           'cBioPortal')):
            os.mkdir(os.path.join(external_data_dirpath, 'cBioPortal'))
        with gzip.open(cbioportal_ref_filepath) as cbioportal_ref_file:
            with open(
                    os.path.join(external_data_dirpath, 'cBioPortal',
                                 'cbioportal-mutations.xml'),
                    'w') as cbioportal_test_file:
                cbioportal_test_file.write(cbioportal_ref_file.read())

        oncotator_ref_filepath = get_installed_resource_filepath(
            os.path.join('resources', 'oncotator-data-abl1.json.gz'))
        shutil.copy(
            oncotator_ref_filepath,
            os.path.join(external_data_dirpath, 'cBioPortal',
                         'oncotator-data.json.gz'))
Example #2
0
    def copy_source_data_files(self):
        uniprot_ref_filepath = get_installed_resource_filepath(
            os.path.join('resources', 'uniprot-search-abl1.xml.gz')
        )
        if not os.path.exists(os.path.join(external_data_dirpath, 'UniProt')):
            os.mkdir(os.path.join(external_data_dirpath, 'UniProt'))
        with gzip.open(uniprot_ref_filepath) as uniprot_ref_file:
            with open(
                os.path.join(external_data_dirpath, 'UniProt', 'uniprot-search.xml'), 'w'
            ) as uniprot_test_file:
                uniprot_test_file.write(uniprot_ref_file.read())

        gene2pubmed_ref_filepath = get_installed_resource_filepath(
            os.path.join('resources', 'gene2pubmed-abl1.gz')
        )
        if not os.path.exists(os.path.join(external_data_dirpath, 'NCBI_Gene')):
            os.mkdir(os.path.join(external_data_dirpath, 'NCBI_Gene'))
        shutil.copy(
            gene2pubmed_ref_filepath,
            os.path.join(external_data_dirpath, 'NCBI_Gene', 'gene2pubmed.gz')
        )

        bindingdb_ref_filepath = get_installed_resource_filepath(
            os.path.join('resources', 'BindingDB-abl1.tab.gz')
        )
        if not os.path.exists(os.path.join(external_data_dirpath, 'BindingDB')):
            os.mkdir(os.path.join(external_data_dirpath, 'BindingDB'))
        with gzip.open(bindingdb_ref_filepath) as bindingdb_ref_file:
            with open(
                os.path.join(external_data_dirpath, 'BindingDB', 'BindingDB_All.tab'), 'w'
            ) as bindingdb_test_file:
                bindingdb_test_file.write(bindingdb_ref_file.read())

        cbioportal_ref_filepath = get_installed_resource_filepath(
            os.path.join('resources', 'cbioportal-mutations-abl1.xml.gz')
        )
        if not os.path.exists(os.path.join(external_data_dirpath, 'cBioPortal')):
            os.mkdir(os.path.join(external_data_dirpath, 'cBioPortal'))
        with gzip.open(cbioportal_ref_filepath) as cbioportal_ref_file:
            with open(
                os.path.join(external_data_dirpath, 'cBioPortal', 'cbioportal-mutations.xml'), 'w'
            ) as cbioportal_test_file:
                cbioportal_test_file.write(cbioportal_ref_file.read())

        oncotator_ref_filepath = get_installed_resource_filepath(
            os.path.join('resources', 'oncotator-data-abl1.json.gz')
        )
        shutil.copy(
            oncotator_ref_filepath,
            os.path.join(external_data_dirpath, 'cBioPortal', 'oncotator-data.json.gz')
        )
Example #3
0
def test_gather_pdb():
    with projecttest_context(set_up_project_stage='uniprot'):
        pdb_and_sifts_structure_files_dir = get_installed_resource_filepath(
            os.path.join('resources', 'structures'))
        GatherPDB(structure_dirs=pdb_and_sifts_structure_files_dir)
        first_pdb_chain = models.PDBChain.query.first()
        assert first_pdb_chain.observed_seq_aln_exp is not None
Example #4
0
def test_gather_pdb():
    with projecttest_context(set_up_project_stage='uniprot'):
        pdb_and_sifts_structure_files_dir = get_installed_resource_filepath(
            os.path.join('resources', 'structures')
        )
        GatherPDB(structure_dirs=pdb_and_sifts_structure_files_dir)
        first_pdb_chain = models.PDBChain.query.first()
        assert first_pdb_chain.observed_seq_aln_exp is not None
def test_retrieve_uniprot_matches_reference():
    xml_text = retrieve_uniprot('mnemonic:ABL1_HUMAN')
    xml_root = etree.fromstring(xml_text, xml_parser)
    ref_xml_filepath = get_installed_resource_filepath(os.path.join('resources', 'uniprot-search-abl1.xml.gz'))
    with gzip.open(ref_xml_filepath) as ref_xml_file:
        ref_xml_root = etree.parse(ref_xml_file, xml_parser)

    entry_name = xml_root.find('entry/name').text
    ref_entry_name = ref_xml_root.find('entry/name').text
    assert entry_name == ref_entry_name
Example #6
0
def test_retrieve_uniprot_matches_reference():
    xml_text = retrieve_uniprot('mnemonic:ABL1_HUMAN')
    xml_root = etree.fromstring(xml_text, xml_parser)
    ref_xml_filepath = get_installed_resource_filepath(
        os.path.join('resources', 'uniprot-search-abl1.xml.gz'))
    with gzip.open(ref_xml_filepath) as ref_xml_file:
        ref_xml_root = etree.parse(ref_xml_file, xml_parser)

    entry_name = xml_root.find('entry/name').text
    ref_entry_name = ref_xml_root.find('entry/name').text
    assert entry_name == ref_entry_name
Example #7
0
def test_extract_sifts_seq():
    sifts_filepath = get_installed_resource_filepath(os.path.join(
        'resources', '4L00.xml.gz'
    ))

    seq = 'MQYLNIKEDCNAMAFCAKMRSSKKTEVNLEAPEPGVEVIFYLSDREPLRLGSGEYTAEEL\
CIRAAQACRISPLCHNLFALYDENTKLWYAPNRTITVDDKMSLRLHYRMRFYFTNWHGTN\
DNEQSVWRHSPKKQKNGYEKKKIPDATPLLDASSLEYLFAQGQYDLVKCLAPIRDPKTEQ\
DGHDIENECLGMAVLAISHYAMMKKMQLPELPKDISYKRYIPETLNKSIRQRNLLTRMRI\
NNVFKDFLKEFNNKTICDSSVSTHDLKVKYLATLETLTKHYGAEIFETSMLLISSENEMN\
WFHSNDGGNVLYYEVMVTGNLGIQWRHKPNVVSVEKEKNKLKRKKLENKHKKDEEKNKIR\
EEWNNFSYFPEITHIVIKESVVSINKQDNKKMELKLSSHEEALSFVSLVDGYFRLTADAH\
HYLCTDVAPPLIVHNIQNGCHGPICTEYAINKLRQEGSEEGMYVLRWSCTDFDNILMTVT\
CFEKSEQVQGAQKQFKNFQIEVQKGRYSLHGSDRSFPSLGDLMSHLKKQILRTDNISFML\
KRCCQPKPREISNLLVATKKAQEWQPVYPMSQLSFDRILKKDLVQGEHLGRGTRTHIYSG\
TLMDYKDDEGTSEEKKIKVILKVLDPSHRDISLAFFEAASMMRQVSHKHIVYLYGVCVRD\
VENIMVEEFVEGGPLDLFMHRKSDVLTTPWKFKVAKQLASALSYLEDKDLVHGNVCTKNL\
LLAREGIDSECGPFIKLSDPGIPITVLSRQECIERIPWIAPECVEDSKNLSVAADKWSFG\
TTLWEICYNGEIPLKDKTLIEKERFYESRCRPVTPSCKELADLMTRCMNYDPNQRPFFRA\
IMRDINKLEEQNPDIVSEKKPATEVDPTHFEKRFLKRIRDLGEGHFGKVELCRYDPEGDN\
TGEQVAVKSLKPESGGNHIADLKKEIEILRNLYHENIVKYKGICTEDGGNGIKLIMEFLP\
SGSLKEYLPKNKNKINLKQQLKYAVQICKGMDYLGSRQYVHRDLAARNVLVESEHQVKIG\
DFGLTKAIETDKEYYTVKDDRDSPVFWYAPECLMQSKFYIASDVWSFGVTLHELLTYCDS\
DSSPMALFLKMIGPTHGQMTVTRLVNTLKEGKRLPCPPNCPDEVYQLMRKCWEFQPSNRT\
SFQNLIEGFEALLK'

    pdb_chain_obj = extract_sifts_seq(sifts_filepath, 'P23458', 'JAK1_HUMAN', '4L00', 'A', seq)

    assert pdb_chain_obj['experimental_seq_aln_conflicts'] == '--------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
---------------------------------------------------gstsAQEWQPVYPMSQLSFD\
RILKKDLVQGEHLGRGTRTHIYSGTLMDYKDDEGTSEEKKIKVILKVLDPSHRDISLAFFEAASMMRQVSH\
KHIVYLYGVCVRDVENIMVEEFVEGGPLDLFMHRKSDVLTTPWKFKVAKQLASALSYLEDKDLVHGNVCTK\
NLLLAREGIDSECGPFIKLSDPGIPITVLSRQECIERIPWIAPECVEDSKNLSVAADKWSFGTTLWEICYN\
GEIPLKDKTLIEKERFYESRCRPVTPSCKELADLMTRCMNYDPNQRPFFRAIMRDINKLEEQNPDIVSEKK\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
----------'

    assert pdb_chain_obj['observed_seq_aln'] == '-----------------------\
Example #8
0
def test_extract_sifts_seq():
    sifts_filepath = get_installed_resource_filepath(
        os.path.join('resources', '4L00.xml.gz'))

    seq = 'MQYLNIKEDCNAMAFCAKMRSSKKTEVNLEAPEPGVEVIFYLSDREPLRLGSGEYTAEEL\
CIRAAQACRISPLCHNLFALYDENTKLWYAPNRTITVDDKMSLRLHYRMRFYFTNWHGTN\
DNEQSVWRHSPKKQKNGYEKKKIPDATPLLDASSLEYLFAQGQYDLVKCLAPIRDPKTEQ\
DGHDIENECLGMAVLAISHYAMMKKMQLPELPKDISYKRYIPETLNKSIRQRNLLTRMRI\
NNVFKDFLKEFNNKTICDSSVSTHDLKVKYLATLETLTKHYGAEIFETSMLLISSENEMN\
WFHSNDGGNVLYYEVMVTGNLGIQWRHKPNVVSVEKEKNKLKRKKLENKHKKDEEKNKIR\
EEWNNFSYFPEITHIVIKESVVSINKQDNKKMELKLSSHEEALSFVSLVDGYFRLTADAH\
HYLCTDVAPPLIVHNIQNGCHGPICTEYAINKLRQEGSEEGMYVLRWSCTDFDNILMTVT\
CFEKSEQVQGAQKQFKNFQIEVQKGRYSLHGSDRSFPSLGDLMSHLKKQILRTDNISFML\
KRCCQPKPREISNLLVATKKAQEWQPVYPMSQLSFDRILKKDLVQGEHLGRGTRTHIYSG\
TLMDYKDDEGTSEEKKIKVILKVLDPSHRDISLAFFEAASMMRQVSHKHIVYLYGVCVRD\
VENIMVEEFVEGGPLDLFMHRKSDVLTTPWKFKVAKQLASALSYLEDKDLVHGNVCTKNL\
LLAREGIDSECGPFIKLSDPGIPITVLSRQECIERIPWIAPECVEDSKNLSVAADKWSFG\
TTLWEICYNGEIPLKDKTLIEKERFYESRCRPVTPSCKELADLMTRCMNYDPNQRPFFRA\
IMRDINKLEEQNPDIVSEKKPATEVDPTHFEKRFLKRIRDLGEGHFGKVELCRYDPEGDN\
TGEQVAVKSLKPESGGNHIADLKKEIEILRNLYHENIVKYKGICTEDGGNGIKLIMEFLP\
SGSLKEYLPKNKNKINLKQQLKYAVQICKGMDYLGSRQYVHRDLAARNVLVESEHQVKIG\
DFGLTKAIETDKEYYTVKDDRDSPVFWYAPECLMQSKFYIASDVWSFGVTLHELLTYCDS\
DSSPMALFLKMIGPTHGQMTVTRLVNTLKEGKRLPCPPNCPDEVYQLMRKCWEFQPSNRT\
SFQNLIEGFEALLK'

    pdb_chain_obj = extract_sifts_seq(sifts_filepath, 'P23458', 'JAK1_HUMAN',
                                      '4L00', 'A', seq)

    assert pdb_chain_obj['experimental_seq_aln_conflicts'] == '--------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
---------------------------------------------------gstsAQEWQPVYPMSQLSFD\
RILKKDLVQGEHLGRGTRTHIYSGTLMDYKDDEGTSEEKKIKVILKVLDPSHRDISLAFFEAASMMRQVSH\
KHIVYLYGVCVRDVENIMVEEFVEGGPLDLFMHRKSDVLTTPWKFKVAKQLASALSYLEDKDLVHGNVCTK\
NLLLAREGIDSECGPFIKLSDPGIPITVLSRQECIERIPWIAPECVEDSKNLSVAADKWSFGTTLWEICYN\
GEIPLKDKTLIEKERFYESRCRPVTPSCKELADLMTRCMNYDPNQRPFFRAIMRDINKLEEQNPDIVSEKK\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
-----------------------------------------------------------------------\
----------'

    assert pdb_chain_obj['observed_seq_aln'] == '-----------------------\
Example #9
0
 def write_manual_overrides_file(self):
     if not os.path.exists(manual_overrides_filename):
         template_manual_overrides_filepath = get_installed_resource_filepath(
             os.path.join('resources', 'template-manual_overrides.yaml'))
         shutil.copy(template_manual_overrides_filepath,
                     manual_overrides_filename)
Example #10
0
 def write_wsgi_file(self):
     if not os.path.exists(wsgi_filename):
         template_wsgi_filepath = get_installed_resource_filepath(
             os.path.join('resources', 'template-wsgi.py'))
         shutil.copy(template_wsgi_filepath, wsgi_filename)
Example #11
0
 def __init__(self):
     self.structure_dirs = get_installed_resource_filepath(
         os.path.join('resources', 'structures'))
     self.uniprot_query = 'mnemonic:ABL1_HUMAN'
     self.uniprot_domain_regex = '^Protein kinase(?!; truncated)(?!; inactive)'
Example #12
0
 def write_manual_overrides_file(self):
     if not os.path.exists(manual_overrides_filename):
         template_manual_overrides_filepath = get_installed_resource_filepath(
             os.path.join('resources', 'template-manual_overrides.yaml')
         )
         shutil.copy(template_manual_overrides_filepath, manual_overrides_filename)
Example #13
0
 def write_wsgi_file(self):
     if not os.path.exists(wsgi_filename):
         template_wsgi_filepath = get_installed_resource_filepath(
             os.path.join('resources', 'template-wsgi.py')
         )
         shutil.copy(template_wsgi_filepath, wsgi_filename)
Example #14
0
 def __init__(self):
     self.structure_dirs = get_installed_resource_filepath(
         os.path.join('resources', 'structures')
     )
     self.uniprot_query = 'mnemonic:ABL1_HUMAN'
     self.uniprot_domain_regex = '^Protein kinase(?!; truncated)(?!; inactive)'