gobp_dict, name='go-biological-process', prefix='gobp', domain=['process']) gocc_data = GOData( gocc_dict, name='go-cellular-component', prefix='gocc', domain=[ 'complex', 'location']) baseline_data['go.xml.gz'] = ( go_file, parsers.GOParser, [ gobp_data, gocc_data]) mesh_file = get_latest_MeSH_filename( 'ftp://nlmpubs.nlm.nih.gov/online/mesh/.asciimesh/', 'd', '.bin') meshcl_dict, meshd_dict, meshpp_dict, meshc_dict, mesha_dict = {}, {}, {}, {}, {} meshcl_data = MESHData( meshcl_dict, name='mesh-cellular-structures', prefix='meshcs', domain=['location'], scheme_type=[ 'ns', 'anno']) meshd_data = MESHData( meshd_dict, name='mesh-diseases', prefix='meshd', domain=['disease'], scheme_type=[
MNs = set() os.chdir(os.pardir) return MESH_dict # data is dictionary key = name, value = list of tuples with source url and id owl_data = { 'anatomy': [('http://purl.obolibrary.org/obo/uberon.owl', 'UBERON')], 'disease': [('http://purl.obolibrary.org/obo/doid.owl', 'DOID')], 'cell': [('http://purl.obolibrary.org/obo/cl.owl', 'CL')], 'cell-line': [('http://purl.obolibrary.org/obo/clo.owl', 'CLO'), ('http://www.ebi.ac.uk/efo/efo.owl', 'EFO')] } # MeSH file is updated yearly - get latest mesh_url = get_latest_MeSH_filename( 'ftp://nlmpubs.nlm.nih.gov/online/mesh/.asciimesh/', 'd', '.bin') # parse version from MeSH filename mesh_ver = mesh_url.split('/')[-1].lstrip('d').rstrip('.bin') # names of .belanno files from MeSH mesh_anno_names = ['cell-structure', 'mesh-diseases', 'mesh-anatomy'] # MeSH sub-branches to imclude in mesh-anatomy anatomy_branches = ('A01', 'A02', 'A03', 'A04', 'A05', 'A06', 'A07', 'A08', 'A09', 'A10', 'A11', 'A12', 'A14', 'A15', 'A16', 'A17') disease_branches = ('C', 'F') # command line argument parser = argparse.ArgumentParser( description="""Generate BEL annotation files. """)
changelog_data = OrderedDict() changelog_data['gene_history.gz'] = \ ('ftp://ftp.ncbi.nih.gov/gene/DATA/gene_history.gz', parsers.EntrezGeneHistoryParser) changelog_data['hgnc.tsv'] = \ ('http://www.genenames.org/cgi-bin/hgnc_downloads?title=HGNC+output+data&hgnc_dbtag=on&preset=all&status=Approved&status=Entry+Withdrawn&status_opt=2&level=pri&=on&where=&order_by=gd_app_sym_sort&limit=&format=text&submit=submit&.cgifields=&.cgifields=level&.cgifields=chr&.cgifields=status&.cgifields=hgnc_dbtag', parsers.HGNCParser) changelog_data['MRK_List1.rpt'] = \ ('ftp://ftp.informatics.jax.org/pub/reports/MRK_List1.rpt', parsers.MGIParser) changelog_data['rgd.txt'] = \ ('ftp://rgd.mcw.edu/pub/data_release/GENES_RAT.txt', parsers.RGDParser) changelog_data['rgd_obsolete.txt'] = \ ('ftp://rgd.mcw.edu/pub/data_release/GENES_OBSOLETE_IDS.txt', parsers.RGDObsoleteParser) changelog_data['delac_sp.txt'] = \ ('ftp://ftp.uniprot.org/pub/databases/uniprot/knowledgebase/docs/delac_sp.txt', parsers.SwissWithdrawnParser) # get the latest GO archive file name and URL go_file = get_latest_GO_filename('http://archive.geneontology.org/latest-full') changelog_data['gobp.xml.gz'] = (go_file, parsers.GOBPParser) changelog_data['gocc.xml.gz'] = (go_file, parsers.GOCCParser) #changelog_data['gobp.xml.gz'] = \ # ('http://archive.geneontology.org/latest-full/go_201309-termdb.obo-xml.gz', parsers.GOBPParser) #changelog_data['gocc.xml.gz'] = \ # ('http://archive.geneontology.org/latest-full/go_201309-termdb.obo-xml.gz', parsers.GOCCParser) changelog_data['chebi.owl'] = \ ('ftp://ftp.ebi.ac.uk/pub/databases/chebi/ontology/chebi.owl', parsers.CHEBIParser) mesh_replace_file = get_latest_MeSH_filename('ftp://nlmpubs.nlm.nih.gov/online/mesh/.newterms/', 'replace', '.txt') changelog_data['replace2013.txt'] = \ (mesh_replace_file, parsers.MESHChangesParser) changelog_data['doid.owl'] = \ ('http://purl.obolibrary.org/obo/doid.owl', parsers.DODeprecatedParser) # vim: ts=4 sts=4 sw=4 noexpandtab