def connect_plugins(debug): """ Search for all modules in the plugin package (directory), import each and run plugin_connect method """ # Initialize File Store FileStore.init("pp-", "~/.paladin-plugins", ".", 30) # Add internal core plugins internal_plugins["flush"] = render_output internal_plugins["write"] = render_output # Import all external plugin modules in package (using full path) for importer, module, package in pkgutil.iter_modules(plugins.__path__): try: module_handle = importlib.import_module("{0}.{1}".format( plugins.__name__, module)) if "plugin_connect" in dir(module_handle): plugin_modules[module] = PluginDef(module_handle) except Exception as exception: if debug: raise exception else: send_output( "Error loading \"{0}.py\", skipping...".format(module), "stderr") # Connect to all external plugins for plugin in plugin_modules: plugin_modules[plugin].module.plugin_connect(plugin_modules[plugin])
def pathways_init(): # Setup FileStore FileStore("pathways-db", "pathways-db", "pathways.db", None, FileStore.FTYPE_CACHE, FileStore.FOPT_NORMAL) # Setup DataStore DataStore("pathways", FileStore.get_entry("pathways-db").path) DataStore.get_entry("pathways").create_table("enzyme", [("ec", "text", "PRIMARY KEY"), ("pathway", "text", "")]) DataStore.get_entry("pathways").create_table("pathway", [("pathway", "text", "PRIMARY KEY"), ("info", "text", "")]) DataStore.get_entry("pathways").define_query("enzyme-lookup", "SELECT pathway FROM enzyme WHERE ec = ?") DataStore.get_entry("pathways").define_query("pathway-lookup", "SELECT info FROM pathway WHERE pathway = ?") # Check for expired database if DataStore.get_entry("pathways").get_expired("enzyme", 30): DataStore.get_entry("pathways").delete_rows("enzyme") DataStore.get_entry("pathways").delete_rows("pathway") DataStore.get_entry("pathways").update_age("enzyme")
def populate_database(): """ Populate sequence header indices """ if not DataStore.get_entry("decluster").get_expired("indices", 30): return core.main.send_output("Populating UniProt sequences...", "stderr") # Start transaction and empty any existing data DataStore.get_entry("decluster").process_trans() DataStore.get_entry("decluster").delete_rows("indices") # Download each sequence file for entry in FileStore.get_group("decluster-seqs"): entry.prepare() with entry.get_handle("rt") as handle: acc = "" while True: line = handle.readline() if not line: break if line.startswith(">"): fields = line.rstrip().split() acc = fields[0].split("|")[1] DataStore.get_entry("decluster").insert_rows( "indices", [(acc, entry.fid, handle.tell() - len(line))]) # Finalize transaction and current table age DataStore.get_entry("decluster").process_trans() DataStore.get_entry("decluster").update_age("indices")
def populate_database(): """ Generate (if necessary) and get lineage lookup """ if not DataStore.get_entry("taxonomy").get_expired("lineage", 30): return core.main.send_output("Populating taxonomic lineage data...", "stderr") # Download tab delimited data entry = FileStore.get_entry("taxonomy-lineage") entry.prepare() # Start transaction and empty any existing data DataStore.get_entry("taxonomy").process_trans() DataStore.get_entry("taxonomy").delete_rows("lineage") # Iterate through downloaded table and add rows with entry.get_handle("r") as handle: for line in handle: fields = line.rstrip().split("\t") if len(fields) < 9 or fields[1] == "": continue # Add to database DataStore.get_entry("taxonomy").insert_rows( "lineage", [(fields[1], fields[8])]) # Finalize transaction and current table age DataStore.get_entry("taxonomy").process_trans() DataStore.get_entry("taxonomy").update_age("lineage")
def populate_database(): """ Generate cross-reference database """ if not DataStore.get_entry("crossref").get_expired("uniprot", 30): return core.main.send_output("Populating UniProt database cross-references...", "stderr") # Download tab delimited data entry = FileStore.get_entry("crossref-uniprot") entry.prepare() # Start transaction and empty any existing data DataStore.get_entry("crossref").drop_index("uniprot_acc") DataStore.get_entry("crossref").drop_index("uniprot_acc_db") DataStore.get_entry("crossref").drop_index("uniprot_db_cross") DataStore.get_entry("crossref").process_trans() DataStore.get_entry("crossref").delete_rows("uniprot") # Iterate through downloaded table and add rows with entry.get_handle("rt") as handle: for line in handle: fields = line.rstrip().split("\t") if len(fields) < 3: continue # Add to database DataStore.get_entry("crossref").insert_rows("uniprot", [fields]) # Finalize transaction and current table age DataStore.get_entry("crossref").process_trans() DataStore.get_entry("crossref").create_index("uniprot_acc") DataStore.get_entry("crossref").create_index("uniprot_acc_db") DataStore.get_entry("crossref").create_index("uniprot_db_cross") DataStore.get_entry("crossref").update_age("uniprot")
def taxonomy_init(): # Setup FileStore FileStore("taxonomy-db", "taxonomy-db", "taxonomy.db", None, FileStore.FTYPE_CACHE, FileStore.FOPT_NORMAL) FileStore("taxonomy-lineage", "taxonomy-lineage", "taxonomy-lineage.dat", "http://www.uniprot.org/taxonomy/?query=&sort=score&format=tab", FileStore.FTYPE_TEMP, FileStore.FOPT_NORMAL) # Setup DataStore DataStore("taxonomy", FileStore.get_entry("taxonomy-db").path) DataStore.get_entry("taxonomy").create_table( "lineage", [("mnemonic", "text", "PRIMARY KEY"), ("lineage", "text", "")]) DataStore.get_entry("taxonomy").define_query( "lineage-lookup", "SELECT lineage FROM lineage WHERE mnemonic = ?") # Populate database populate_database()
def render_sequences(cluster_ids): """ Retrieve all members of requested UniRef90 clusters and render fasta data """ # Prepare all sequence files for reading for entry in FileStore.get_group("decluster-seqs"): entry.get_handle("rt") for cluster_id in cluster_ids: for result in DataStore.get_entry("crossref").exec_query( "uniprot_cross_acc", ("UniRef90", cluster_id)).fetchmany(): core.main.send_output(get_sequence(result[0]), "stdout", "")
def decluster_init(): # Setup FileStore FileStore("decluster-db", "decluster-db", "decluster.db", None, FileStore.FTYPE_CACHE, FileStore.FOPT_NORMAL) FileStore( "decluster-seqs", "decluster-swissprot", "decluster_uniprot_sprot.fasta.gz", "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz", FileStore.FTYPE_CACHE, FileStore.FOPT_GZIP_DECOMPRESS) FileStore( "decluster-seqs", "decluster-trembl", "decluster_uniprot_trembl.fasta.gz", "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz", FileStore.FTYPE_CACHE, FileStore.FOPT_GZIP_DECOMPRESS) # Setup DataStore DataStore("decluster", FileStore.get_entry("decluster-db").path) DataStore.get_entry("decluster").create_table( "indices", [("id", "text", "PRIMARY KEY"), ("file", "text", ""), ("pos", "integer", "")]) DataStore.get_entry("decluster").define_query( "index-lookup", "SELECT file, pos FROM indices WHERE id = ?") populate_database()
def crossref_init(): # Setup FileStore FileStore("crossref-db", "crossref-db", "crossref.db", None, FileStore.FTYPE_CACHE, FileStore.FOPT_NORMAL) FileStore( "crossref-uniprot", "crossref-uniprot", "idmapping.dat.gz", "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz", FileStore.FTYPE_TEMP, FileStore.FOPT_GZIP) # Setup DataStore DataStore("crossref", FileStore.get_entry("crossref-db").get_path()) DataStore.get_entry("crossref").create_table("uniprot", [("acc", "text", ""), ("db", "text", ""), ("cross", "text", "")]) DataStore.get_entry("crossref").define_query( "uniprot_acc_cross", "SELECT cross FROM uniprot WHERE acc = ? AND db = ?") DataStore.get_entry("crossref").define_query( "uniprot_acc_all", "SELECT db, cross FROM uniprot WHERE acc = ?") DataStore.get_entry("crossref").define_query( "uniprot_cross_acc", "SELECT acc FROM uniprot WHERE db = ? AND cross = ?") DataStore.get_entry("crossref").define_query( "uniprot_cross_cross", "SELECT t2.cross FROM uniprot AS t1 JOIN uniprot AS t2 ON acc WHERE t1.db = ? AND t1.cross = ? AND t2.db = ?" ) DataStore.get_entry("crossref").define_index("uniprot_acc", "uniprot", ["acc"], False) DataStore.get_entry("crossref").define_index("uniprot_acc_db", "uniprot", ["acc", "db"], False) DataStore.get_entry("crossref").define_index("uniprot_db_cross", "uniprot", ["db", "cross"], False) # Populate database populate_database()
def get_sequence(acc): """ Lookup sequence for given acc """ result = DataStore.get_entry("decluster").exec_query( "index-lookup", [acc]).fetchone() if not result: core.main.send_output( "Sequence not found for UniProt accession '{0}'".format(acc)) sys.exit(1) # Seek the index position in the appropriate file handle = FileStore.get_entry(result[0], "decluster-seqs").get_handle() handle.seek(result[1]) # Append sequence data until next header ret_seq = "" for line in handle: if line.startswith(">") and ret_seq: break ret_seq += line return ret_seq
# Parse arguments arguments = parse_arguments() # Connect to plugins core.main.connect_plugins(arguments.debug) # Handle non-pipeline actions pipeline_present = True if arguments.list: pipeline_present = False list_plugins() if pipeline_present: pipeline = get_pipeline(arguments.plugins) try: core.main.exec_pipeline(pipeline) # Initialize plugins #if core.main.init_plugins(set(list(zip(*pipeline))[0])): # Execute pipeline # core.main.exec_pipeline(pipeline) # Do a final flush to standard out core.main.exec_pipeline([("flush", "")]) finally: # Clean up FileStore FileStore.destroy()