コード例 #1
0
    logger = logging.getLogger('peewee')
    logger.setLevel(logging.INFO)
    init_log()

    mdb = BioMongoDB("saureus")
    tax_db.initialize(MySQLDatabase('bioseqdb', user='******', passwd="mito"))
    mysql_db.initialize(MySQLDatabase('sndg', user='******', passwd="mito"))
    assemblies = list(ExternalAssembly.select().where(
        ExternalAssembly.sample_source.is_null(False)))

    ProteinAnnotator.connect_to_db(database="unipmap",
                                   user="******",
                                   password="******")
    with tqdm(assemblies) as pbar:
        for x in pbar:
            if mdb.seq_col_exists(x.assembly_accession):
                continue
            pbar.set_description(x.assembly_accession)
            try:
                dst_dir = "/data/organismos/" + x.assembly_accession + "/annotation/"
                mkdir(dst_dir)
                gbpath = x.download_gbk(dst_dir)
                from_ref_seq(x.assembly_accession,
                             gbpath,
                             tax=x.ncbi_tax,
                             tmp_dir=dst_dir)

                tid = int(
                    mdb.db.sequence_collection.find_one(
                        {"name": x.assembly_accession})["tax"]["tid"])
                tmp_dir = "/data/organismos/" + x.assembly_accession + "/annotation/"
コード例 #2
0
    parser.add_argument("--cpus", default=multiprocessing.cpu_count())
    parser.add_argument("-mydbunip", "--mysqldbunip", default="unipmap")
    parser.add_argument("-myu", "--mysqldbuser", default="root")

    args = parser.parse_args()
    args.cpus = int(args.cpus)
    mdb = BioMongoDB(args.mongodbname)
    tax_db.initialize(
        MySQLDatabase(args.mysqldbtaxname,
                      user=args.mysqldbuser,
                      passwd=args.dbpass))
    ProteinAnnotator.connect_to_db(database=args.mysqldbunip,
                                   user=args.mysqldbuser,
                                   password=args.dbpass)

    assert not mdb.seq_col_exists(
        args.assemblyAccession), "assembly already exists"
    Entrez.email = "*****@*****.**"
    assembly_id = Entrez.read(
        Entrez.esearch(db="assembly", term=args.assemblyAccession,
                       retmax=1))["IdList"][0]
    resource = Entrez.read(
        Entrez.esummary(db="assembly", id=assembly_id, validate=False))
    try:

        data = resource["DocumentSummarySet"]["DocumentSummary"][0]
        name = data["AssemblyName"]
        genome = str(data["SpeciesName"])
        tax = data["Taxid"]
        status = data["AssemblyStatus"]

        ea = ExternalAssembly(type="assembly",