Ejemplo n.º 1
0
def connect_plugins(debug):
    """ Search for all modules in the plugin package (directory), import each and run plugin_connect method """
    # Initialize File Store
    FileStore.init("pp-", "~/.paladin-plugins", ".", 30)

    # Add internal core plugins
    internal_plugins["flush"] = render_output
    internal_plugins["write"] = render_output

    # Import all external plugin modules in package (using full path)
    for importer, module, package in pkgutil.iter_modules(plugins.__path__):
        try:
            module_handle = importlib.import_module("{0}.{1}".format(
                plugins.__name__, module))
            if "plugin_connect" in dir(module_handle):
                plugin_modules[module] = PluginDef(module_handle)

        except Exception as exception:
            if debug:
                raise exception
            else:
                send_output(
                    "Error loading \"{0}.py\", skipping...".format(module),
                    "stderr")

    # Connect to all external plugins
    for plugin in plugin_modules:
        plugin_modules[plugin].module.plugin_connect(plugin_modules[plugin])
Ejemplo n.º 2
0
def pathways_init():
    # Setup FileStore
    FileStore("pathways-db", "pathways-db", "pathways.db", None, FileStore.FTYPE_CACHE, FileStore.FOPT_NORMAL)

    # Setup DataStore
    DataStore("pathways", FileStore.get_entry("pathways-db").path)
    DataStore.get_entry("pathways").create_table("enzyme", [("ec", "text", "PRIMARY KEY"), ("pathway", "text", "")])
    DataStore.get_entry("pathways").create_table("pathway", [("pathway", "text", "PRIMARY KEY"), ("info", "text", "")])
    DataStore.get_entry("pathways").define_query("enzyme-lookup", "SELECT pathway FROM enzyme WHERE ec = ?")
    DataStore.get_entry("pathways").define_query("pathway-lookup", "SELECT info FROM pathway WHERE pathway = ?")

    # Check for expired database
    if DataStore.get_entry("pathways").get_expired("enzyme", 30):
        DataStore.get_entry("pathways").delete_rows("enzyme")
        DataStore.get_entry("pathways").delete_rows("pathway")
        DataStore.get_entry("pathways").update_age("enzyme")
Ejemplo n.º 3
0
def populate_database():
    """ Populate sequence header indices """
    if not DataStore.get_entry("decluster").get_expired("indices", 30):
        return

    core.main.send_output("Populating UniProt sequences...", "stderr")

    # Start transaction and empty any existing data
    DataStore.get_entry("decluster").process_trans()
    DataStore.get_entry("decluster").delete_rows("indices")

    # Download each sequence file
    for entry in FileStore.get_group("decluster-seqs"):
        entry.prepare()

        with entry.get_handle("rt") as handle:
            acc = ""

            while True:
                line = handle.readline()
                if not line:
                    break

                if line.startswith(">"):
                    fields = line.rstrip().split()
                    acc = fields[0].split("|")[1]
                    DataStore.get_entry("decluster").insert_rows(
                        "indices",
                        [(acc, entry.fid, handle.tell() - len(line))])

    # Finalize transaction and current table age
    DataStore.get_entry("decluster").process_trans()
    DataStore.get_entry("decluster").update_age("indices")
Ejemplo n.º 4
0
def populate_database():
    """ Generate (if necessary) and get lineage lookup """
    if not DataStore.get_entry("taxonomy").get_expired("lineage", 30):
        return

    core.main.send_output("Populating taxonomic lineage data...", "stderr")

    # Download tab delimited data
    entry = FileStore.get_entry("taxonomy-lineage")
    entry.prepare()

    # Start transaction and empty any existing data
    DataStore.get_entry("taxonomy").process_trans()
    DataStore.get_entry("taxonomy").delete_rows("lineage")

    # Iterate through downloaded table and add rows
    with entry.get_handle("r") as handle:
        for line in handle:
            fields = line.rstrip().split("\t")
            if len(fields) < 9 or fields[1] == "":
                continue

            # Add to database
            DataStore.get_entry("taxonomy").insert_rows(
                "lineage", [(fields[1], fields[8])])

    # Finalize transaction and current table age
    DataStore.get_entry("taxonomy").process_trans()
    DataStore.get_entry("taxonomy").update_age("lineage")
Ejemplo n.º 5
0
def populate_database():
    """ Generate cross-reference database """
    if not DataStore.get_entry("crossref").get_expired("uniprot", 30):
        return

    core.main.send_output("Populating UniProt database cross-references...",
                          "stderr")

    # Download tab delimited data
    entry = FileStore.get_entry("crossref-uniprot")
    entry.prepare()

    # Start transaction and empty any existing data
    DataStore.get_entry("crossref").drop_index("uniprot_acc")
    DataStore.get_entry("crossref").drop_index("uniprot_acc_db")
    DataStore.get_entry("crossref").drop_index("uniprot_db_cross")
    DataStore.get_entry("crossref").process_trans()
    DataStore.get_entry("crossref").delete_rows("uniprot")

    # Iterate through downloaded table and add rows
    with entry.get_handle("rt") as handle:
        for line in handle:
            fields = line.rstrip().split("\t")
            if len(fields) < 3:
                continue

            # Add to database
            DataStore.get_entry("crossref").insert_rows("uniprot", [fields])

    # Finalize transaction and current table age
    DataStore.get_entry("crossref").process_trans()
    DataStore.get_entry("crossref").create_index("uniprot_acc")
    DataStore.get_entry("crossref").create_index("uniprot_acc_db")
    DataStore.get_entry("crossref").create_index("uniprot_db_cross")
    DataStore.get_entry("crossref").update_age("uniprot")
Ejemplo n.º 6
0
def taxonomy_init():
    # Setup FileStore
    FileStore("taxonomy-db", "taxonomy-db", "taxonomy.db", None,
              FileStore.FTYPE_CACHE, FileStore.FOPT_NORMAL)
    FileStore("taxonomy-lineage", "taxonomy-lineage", "taxonomy-lineage.dat",
              "http://www.uniprot.org/taxonomy/?query=&sort=score&format=tab",
              FileStore.FTYPE_TEMP, FileStore.FOPT_NORMAL)

    # Setup DataStore
    DataStore("taxonomy", FileStore.get_entry("taxonomy-db").path)
    DataStore.get_entry("taxonomy").create_table(
        "lineage", [("mnemonic", "text", "PRIMARY KEY"),
                    ("lineage", "text", "")])
    DataStore.get_entry("taxonomy").define_query(
        "lineage-lookup", "SELECT lineage FROM lineage WHERE mnemonic = ?")

    # Populate database
    populate_database()
Ejemplo n.º 7
0
def render_sequences(cluster_ids):
    """ Retrieve all members of requested UniRef90 clusters and render fasta data """
    # Prepare all sequence files for reading
    for entry in FileStore.get_group("decluster-seqs"):
        entry.get_handle("rt")

    for cluster_id in cluster_ids:
        for result in DataStore.get_entry("crossref").exec_query(
                "uniprot_cross_acc", ("UniRef90", cluster_id)).fetchmany():
            core.main.send_output(get_sequence(result[0]), "stdout", "")
Ejemplo n.º 8
0
def decluster_init():
    # Setup FileStore
    FileStore("decluster-db", "decluster-db", "decluster.db", None,
              FileStore.FTYPE_CACHE, FileStore.FOPT_NORMAL)
    FileStore(
        "decluster-seqs", "decluster-swissprot",
        "decluster_uniprot_sprot.fasta.gz",
        "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz",
        FileStore.FTYPE_CACHE, FileStore.FOPT_GZIP_DECOMPRESS)
    FileStore(
        "decluster-seqs", "decluster-trembl",
        "decluster_uniprot_trembl.fasta.gz",
        "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz",
        FileStore.FTYPE_CACHE, FileStore.FOPT_GZIP_DECOMPRESS)

    # Setup DataStore
    DataStore("decluster", FileStore.get_entry("decluster-db").path)
    DataStore.get_entry("decluster").create_table(
        "indices", [("id", "text", "PRIMARY KEY"), ("file", "text", ""),
                    ("pos", "integer", "")])
    DataStore.get_entry("decluster").define_query(
        "index-lookup", "SELECT file, pos FROM indices WHERE id = ?")

    populate_database()
Ejemplo n.º 9
0
def crossref_init():
    # Setup FileStore
    FileStore("crossref-db", "crossref-db", "crossref.db", None,
              FileStore.FTYPE_CACHE, FileStore.FOPT_NORMAL)
    FileStore(
        "crossref-uniprot", "crossref-uniprot", "idmapping.dat.gz",
        "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz",
        FileStore.FTYPE_TEMP, FileStore.FOPT_GZIP)

    # Setup DataStore
    DataStore("crossref", FileStore.get_entry("crossref-db").get_path())
    DataStore.get_entry("crossref").create_table("uniprot",
                                                 [("acc", "text", ""),
                                                  ("db", "text", ""),
                                                  ("cross", "text", "")])
    DataStore.get_entry("crossref").define_query(
        "uniprot_acc_cross",
        "SELECT cross FROM uniprot WHERE acc = ? AND db = ?")
    DataStore.get_entry("crossref").define_query(
        "uniprot_acc_all", "SELECT db, cross FROM uniprot WHERE acc = ?")
    DataStore.get_entry("crossref").define_query(
        "uniprot_cross_acc",
        "SELECT acc FROM uniprot WHERE db = ? AND cross = ?")
    DataStore.get_entry("crossref").define_query(
        "uniprot_cross_cross",
        "SELECT t2.cross FROM uniprot AS t1 JOIN uniprot AS t2 ON acc WHERE t1.db = ? AND t1.cross = ? AND t2.db = ?"
    )
    DataStore.get_entry("crossref").define_index("uniprot_acc", "uniprot",
                                                 ["acc"], False)
    DataStore.get_entry("crossref").define_index("uniprot_acc_db", "uniprot",
                                                 ["acc", "db"], False)
    DataStore.get_entry("crossref").define_index("uniprot_db_cross", "uniprot",
                                                 ["db", "cross"], False)

    # Populate database
    populate_database()
Ejemplo n.º 10
0
def get_sequence(acc):
    """ Lookup sequence for given acc """
    result = DataStore.get_entry("decluster").exec_query(
        "index-lookup", [acc]).fetchone()
    if not result:
        core.main.send_output(
            "Sequence not found for UniProt accession '{0}'".format(acc))
        sys.exit(1)

    # Seek the index position in the appropriate file
    handle = FileStore.get_entry(result[0], "decluster-seqs").get_handle()
    handle.seek(result[1])

    # Append sequence data until next header
    ret_seq = ""
    for line in handle:
        if line.startswith(">") and ret_seq:
            break

        ret_seq += line

    return ret_seq
Ejemplo n.º 11
0

# Parse arguments
arguments = parse_arguments()

# Connect to plugins
core.main.connect_plugins(arguments.debug)

# Handle non-pipeline actions
pipeline_present = True
if arguments.list:
    pipeline_present = False
    list_plugins()

if pipeline_present:
    pipeline = get_pipeline(arguments.plugins)

    try:
        core.main.exec_pipeline(pipeline)
        # Initialize plugins
        #if core.main.init_plugins(set(list(zip(*pipeline))[0])):
        # Execute pipeline
        #    core.main.exec_pipeline(pipeline)

        # Do a final flush to standard out
        core.main.exec_pipeline([("flush", "")])

    finally:
        # Clean up FileStore
        FileStore.destroy()