from Bio.PDB.PDBParser import PDBParser from mongoengine.errors import DoesNotExist from SNDG import init_log from SNDG.BioMongo.Model import BioProperties from SNDG.BioMongo.Model import Cluster from SNDG.BioMongo.Model.Structure import ExperimentalStructure, ResidueSet from SNDG.BioMongo.Process.BioMongoDB import BioMongoDB from SNDG.Structure.CompoundTypes import get_compound_type from SNDG.Structure.FPocket import FPocket from SNDG.Structure.PDBs import PDBs # from Bia.Programs.Cluster.CDHit import CDHit init_log() _log = logging.getLogger(__name__) from SNDG.Structure.CompoundTypes import compound_type def update_clusters(): for cluster_name, seqs in CDHit().clustered_seq_iterator("/data/databases/pdb/processed/seqs_from_pdb95.fasta"): _log.debug(cluster_name) cristals = [] cluster = Cluster(name=cluster_name, type="PDB_Segments_95") for seq in seqs: seq_id, seq_start, seq_end, clust_start, clust_end = seq pdb, chain, start, end = seq_id.split("_") cristals.append(pdb)
#!/usr/bin/env python import os import time from SNDG import mkdir, execute, execute_from, init_log from SNDG.WebServices import download_file from SNDG.Structure.PDBs import PDBs init_log("/tmp/createdb.log") def old_or_inexistent(filepath, period=30): return not os.path.exists(filepath) or (( (time.time() - os.path.getatime(filepath)) / 60 / 60 / 24) > period) #os.environ["http_proxy"] = "http://proxy.fcen.uba.ar:8080" #os.environ["ftp_proxy"] = "http://proxy.fcen.uba.ar:8080" mkdir("/data/pdb/") download_file("ftp://ftp.wwpdb.org/pub/pdb/derived_data/index/entries.idx", "/data/pdb/entries.idx", ovewrite=True) pdbs = PDBs("/data/pdb/") pdbs.download_pdb_seq_ses() pdbs.update_pdb_dir() mkdir("/data/pdb/processed/") pdbs.pdbs_seq_for_modelling() execute("makeblastdb -dbtype prot -in /data/pdb/processed/seqs_from_pdb.fasta")
protein_fasta = tmp_dir + "/proteins.fasta" if not os.path.exists(protein_fasta) or ( not os.path.getsize(protein_fasta)): with open(protein_fasta, "w") as h: for p in Protein.objects(organism=seq_col_name).no_cache(): bpio.write(SeqRecord(id=p.gene[0], seq=Seq(p.seq)), h, "fasta") genome = SeqCollection.objects(name=seq_col_name).get() genome.ncbi_assembly = seq_col_name if not genome.statistics: self.mdb.index_seq_collection(seq_col_name, pathways=False) self.mdb.build_statistics(seq_col_name) _log.info("Sequence collection %s created correctly " % seq_col_name) if __name__ == '__main__': init_log("/tmp/sndg2.log") logger = logging.getLogger('peewee') logger.setLevel(logging.ERROR) dep = Deployer() connect_to_db(password="******") import mysql.connector dep.mdb = "saureus" #dep.annotation_tax = "158879" dep.init() tax_db.initialize(MySQLDatabase('bioseq', user='******', passwd="mito"))
@author: eze ''' import logging from argparse import ArgumentParser, RawDescriptionHelpFormatter import math from Bio.PDB.PDBParser import PDBParser from tqdm import tqdm from SNDG import init_log from SNDG.Structure.PDBdb import * from SNDG.Structure.PDBs import PDBs init_log(rootloglevel=logging.INFO) _log = logging.getLogger(__name__) if __name__ == "__main__": parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-p", "--dbpass", required=True) parser.add_argument("-i", "--pdb_dir", default="/data/databases/pdb/") parser.add_argument("-db", "--dbname", default="pdbdb") parser.add_argument("-u", "--dbuser", default="root") args = parser.parse_args() from peewee import MySQLDatabase mysql_db = MySQLDatabase(args.dbname,
import logging import pymongo from mongoengine.connection import connect from pymongo.mongo_client import MongoClient from SNDG import init_log from SNDG.BioMongo.Model.SeqCollection import Genome, SeqColDruggabilityParam from SNDG.BioMongo.Process.BioCyc2Mongo import BioCyc from SNDG.BioMongo.Process.StructureAnotator import StructureAnotator from SNDG.BioMongo.Process.StructureIndexer import StructuromeIndexer from SNDG.BioMongo.Process.PathwaysAnnotator import PathwaysAnnotator from SNDG.BioMongo.Process.BioMongoDB import BioMongoDB from pprint import pprint init_log("/tmp/validate.log") _log = logging.getLogger(__name__) db = MongoClient().tdr import re regx_go = re.compile("^go:") # , re.IGNORECASE) regx_ec = re.compile("^ec:") def validate_pathways_protein(g): for dp in BioCyc.pathways_search_params: if not g.has_druggability_param(dp[0]): print "%s no tiene el atributo %s" % (g.name, dp[0]) count = db.proteins.count({
from SNDG import init_log from SNDG.Structure.FPocket import FPocket if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-d", "--models_directory", required=True) parser.add_argument("-l", "--log_path", default=None) args = parser.parse_args() assert os.path.exists(args.models_directory), "%s does not exists" % args.models_directory if not args.log_path: args.log_path = args.models_directory + "/pocketome.log" init_log(args.log_path, logging.INFO) _log = logging.getLogger("pocketome") with tqdm(glob.glob(args.models_directory + "/*.pdb")) as pbar: for pdb in pbar: pbar.set_description(pdb) try: pocket_data = pdb + ".json" if not os.path.exists(pocket_data): fpo = FPocket(pdb) result = fpo.hunt_pockets() result.save(pocket_data) result.delete_dir() except Exception as e: _log.warn(e)
["uniprot", "db", "value"]) }) except IntegrityError: pass if __name__ == '__main__': from SNDG import init_log import logging from SNDG.BioMongo.Process.BioMongoDB import BioMongoDB mysqldb = ProteinAnnotator.connect_to_db(database="unipmap", user="******", password="******") pa = ProteinAnnotator() # pa.connect_to_db(password="******") # pa.create_db() # pa.populate_sql("/data/uniprot/idmapping_filtered.dat", # "/data/uniprot/goa/goa_uniprot_all.gpa") tmpdir = "/tmp/lepto/Lepto-CLM-U50" logging.getLogger("peewee").setLevel(logging.WARN) init_log(log_file_path=tmpdir + "/ann.log") mdb = BioMongoDB("tdr") tax = 1958811 list(Mapping.select().where(Mapping.uniprot == "12")) n = "Lepto-CLM-U50" from SNDG.BioMongo.Process.Importer import update_proteins update_proteins(tmpdir, tmpdir + "/genome.fasta", n, tax, db_init=mysqldb)