logger = logging.getLogger('peewee') logger.setLevel(logging.INFO) init_log() mdb = BioMongoDB("saureus") tax_db.initialize(MySQLDatabase('bioseqdb', user='******', passwd="mito")) mysql_db.initialize(MySQLDatabase('sndg', user='******', passwd="mito")) assemblies = list(ExternalAssembly.select().where( ExternalAssembly.sample_source.is_null(False))) ProteinAnnotator.connect_to_db(database="unipmap", user="******", password="******") with tqdm(assemblies) as pbar: for x in pbar: if mdb.seq_col_exists(x.assembly_accession): continue pbar.set_description(x.assembly_accession) try: dst_dir = "/data/organismos/" + x.assembly_accession + "/annotation/" mkdir(dst_dir) gbpath = x.download_gbk(dst_dir) from_ref_seq(x.assembly_accession, gbpath, tax=x.ncbi_tax, tmp_dir=dst_dir) tid = int( mdb.db.sequence_collection.find_one( {"name": x.assembly_accession})["tax"]["tid"]) tmp_dir = "/data/organismos/" + x.assembly_accession + "/annotation/"
parser.add_argument("--cpus", default=multiprocessing.cpu_count()) parser.add_argument("-mydbunip", "--mysqldbunip", default="unipmap") parser.add_argument("-myu", "--mysqldbuser", default="root") args = parser.parse_args() args.cpus = int(args.cpus) mdb = BioMongoDB(args.mongodbname) tax_db.initialize( MySQLDatabase(args.mysqldbtaxname, user=args.mysqldbuser, passwd=args.dbpass)) ProteinAnnotator.connect_to_db(database=args.mysqldbunip, user=args.mysqldbuser, password=args.dbpass) assert not mdb.seq_col_exists( args.assemblyAccession), "assembly already exists" Entrez.email = "*****@*****.**" assembly_id = Entrez.read( Entrez.esearch(db="assembly", term=args.assemblyAccession, retmax=1))["IdList"][0] resource = Entrez.read( Entrez.esummary(db="assembly", id=assembly_id, validate=False)) try: data = resource["DocumentSummarySet"]["DocumentSummary"][0] name = data["AssemblyName"] genome = str(data["SpeciesName"]) tax = data["Taxid"] status = data["AssemblyStatus"] ea = ExternalAssembly(type="assembly",