Exemple #1
0
def test_general():
    s = BioMart()
    #s.registry()
    s.datasets("ensembl")
    s.version("ensembl")
    s.attributes("oanatinus_gene_ensembl")
    s.filters("oanatinus_gene_ensembl")
    s.configuration("oanatinus_gene_ensembl")

    xmlq = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE Query>
<Query  virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "0" count = "" datasetConfigVersion = "0.6" >
                        
        <Dataset name = "pathway" interface = "default" >
                <Filter name = "referencepeptidesequence_uniprot_id_list" value = "P43403"/>
                <Attribute name = "stableidentifier_identifier" />
                <Attribute name = "pathway_db_id" />
        </Dataset>
</Query>
"""
    s.query(s._xml_example)


    # build own xml using the proper functions
    s.add_dataset_to_xml("protein")
    s.get_xml()
Exemple #2
0
def get_attributes(dataset):

    bm = BioMart(verbose=False)
    attributes = bm.attributes(dataset)
    attr_dicts = [{"Attribute": k, "Description": v[0]}
                  for k, v in attributes.items()]
    return pd.DataFrame.from_dict(attr_dicts)
Exemple #3
0
def id_map_ensembl(to_annotation, version, species, psm_protein_id):
    '''
    :param to_annotation: target identifier annotation (i.e. uniprot_swissprot)
    :param version: Database version
    :param species: Full species name
    :param psm_protein_id: list of IDs to be converted
    :return: BioMart results
    '''
    # If species is in plantsDB, execute plants adjusted function
    if species == "arabidopsis_thaliana":
        result = id_map_ensembl_plants(to_annotation, version, species,
                                       psm_protein_id)
        return result
    else:
        #adjust UniProt xml annotation for BioMart version >87
        if int(version) > 87 and "uniprot" in to_annotation:
            to_annotation = to_annotation.replace('_', '')
        #create connection
        query_string = _id_in_xml_query_(psm_protein_id)
        version = _get_ensembl_archive_(version, species)
        dataset = _get_ensembl_dataset_(species)
        biomart = BioMart(host=version)

        #add filters
        biomart.add_dataset_to_xml(dataset)
        biomart.add_filter_to_xml(to_annotation, query_string)

        #add attributs
        biomart.add_attribute_to_xml("ensembl_transcript_id")
        biomart.add_attribute_to_xml("transcript_start")
        biomart.add_attribute_to_xml("transcript_end")
        biomart.add_attribute_to_xml(to_annotation)
        attributes = biomart.attributes(dataset)

        #execute query
        xml_query = biomart.get_xml()
        tmp_result = biomart.query(xml_query)
        if len(tmp_result) == 1:
            print "ERROR: could not convert ID's trough BioMart, " \
                  "Please check whether Ensembl version/species were correctly supplied"
        tmp_result = tmp_result.split("\n")
        result = []
        if tmp_result != []:
            for i in tmp_result:
                i = i.split("\t")
                if i[0] != "":
                    result.append([i[0], (int(i[2]) - int(i[1])), i[3]])
                else:
                    result.append(i)
        return result
def retrieve_data_from_biomart(version, species, transcript_id,
                               three_frame_translation):
    '''
    :param version: Database version
    :param species: Full species name
    :param transcript_id: list of transcript IDs
    :return: BioMart results
    '''

    #create connection
    tr_query = _id_in_xml_query_(transcript_id)
    version = _get_ensembl_archive_(version, species)

    dataset = _get_ensembl_dataset_(species)
    biomart = BioMart(host=version)

    #add filters
    biomart.add_dataset_to_xml(dataset)
    biomart.add_filter_to_xml("ensembl_transcript_id", tr_query)

    #add attributes
    biomart.add_attribute_to_xml('ensembl_transcript_id')
    biomart.add_attribute_to_xml("chromosome_name")
    biomart.add_attribute_to_xml("strand")
    if three_frame_translation == "Y":
        biomart.add_attribute_to_xml("cdna")
    else:
        biomart.add_attribute_to_xml("coding")
    attributes = biomart.attributes(dataset)

    #execute query
    xml_query = biomart.get_xml()

    # create bypass for plants database
    if species == "arabidopsis_thaliana":
        xml_query = xml_query.replace('virtualSchemaName = "default"',
                                      'virtualSchemaName = "plants_mart_30"')

    result = biomart.query(xml_query)
    result = result.split("\n")

    return result
def retrieve_data_from_biomart(version,species,transcript_id,three_frame_translation):
    '''
    :param version: Database version
    :param species: Full species name
    :param transcript_id: list of transcript IDs
    :return: BioMart results
    '''

    #create connection
    tr_query=_id_in_xml_query_(transcript_id)
    version=_get_ensembl_archive_(version,species)

    dataset=_get_ensembl_dataset_(species)
    biomart = BioMart(host=version)

    #add filters
    biomart.add_dataset_to_xml(dataset)
    biomart.add_filter_to_xml("ensembl_transcript_id",tr_query)

    #add attributes
    biomart.add_attribute_to_xml('ensembl_transcript_id')
    biomart.add_attribute_to_xml("chromosome_name")
    biomart.add_attribute_to_xml("strand")
    if three_frame_translation=="Y":
        biomart.add_attribute_to_xml("cdna")
    else:
        biomart.add_attribute_to_xml("coding")
    attributes=biomart.attributes(dataset)

    #execute query
    xml_query=biomart.get_xml()

    # create bypass for plants database
    if species=="arabidopsis_thaliana":
        xml_query=xml_query.replace('virtualSchemaName = "default"','virtualSchemaName = "plants_mart_30"')

    result=biomart.query(xml_query)
    result=result.split("\n")

    return result
def id_map_ensembl(to_annotation,version,species,psm_protein_id):
    '''
    :param to_annotation: target identifier annotation (i.e. uniprot_swissprot)
    :param version: Database version
    :param species: Full species name
    :param psm_protein_id: list of IDs to be converted
    :return: BioMart results
    '''

    # If species is in plantsDB, execute plants adjusted function
    if species=="arabidopsis_thaliana":
        result=id_map_ensembl_plants(to_annotation,version,species,psm_protein_id)
        return result
    else:

        #create connection
        query_string=_id_in_xml_query_(psm_protein_id)
        version=_get_ensembl_archive_(version,species)
        dataset=_get_ensembl_dataset_(species)
        biomart = BioMart(host=version)

        #add filters
        biomart.add_dataset_to_xml(dataset)
        biomart.add_filter_to_xml(to_annotation,query_string)

        #add attributs
        biomart.add_attribute_to_xml("ensembl_transcript_id")
        biomart.add_attribute_to_xml("transcript_length")
        biomart.add_attribute_to_xml("uniprot_sptrembl")
        attributes=biomart.attributes(dataset)

        #execute query
        xml_query=biomart.get_xml()
        result=biomart.query(xml_query)
        result=result.split("\n")

        return result