def query(r, d, query_sdf, ref_iddb, embedOnly=False): current_dir = os.path.abspath(".") query_sdf = os.path.join(current_dir, query_sdf) ref_iddb = os.path.join(current_dir, ref_iddb) work_dir = os.path.join(current_dir, 'run-%s-%s' % (r, d)) if not os.path.isfile(query_sdf): raise StandardError("query file " + query_sdf + " not found") if not os.path.isfile(ref_iddb): raise StandardError("reference file " + ref_iddb + " not found") if not os.path.isdir(work_dir): raise StandardError("working directory " + work_dir + " not found") temp_dir = mkdtemp() query_base = os.path.splitext(os.path.basename(query_sdf))[0] query_cdb = os.path.join(temp_dir, query_base + ".cdb") matrix_file = os.path.join(work_dir, "matrix.%s-%s" % (r, d)) coord_file = ref_iddb + ".distmat.coord" try: #TODO move this to non-batch branch and check parsing_time, num_compounds = createQueryCdb(query_sdf, query_cdb) info("found %s compounds" % num_compounds) ref_db = gen_subdb(ref_iddb, None, DB_SUBSET, CDB) names = [i.strip() for i in file(CDB + ".names")] f = file(query_base + ".out", 'w') os.chdir(temp_dir) if embedOnly or num_compounds > 1: batchQuery(f, r, d, ref_db, query_sdf, coord_file, matrix_file, names, embedOnly) else: puzzle_file = os.path.join(temp_dir, "puzzle") refineResult = refine( QueryFile(query_cdb), lshSearch( matrix_file, solvePuzzle(r, d, ref_db, query_cdb, coord_file, puzzle_file))) for seq_id, dist in refineResult: f.write('%s %s\n' % (names[int(seq_id) - 1], dist)) info("refine result: " + str(refineResult)) f.close() os.chdir(current_dir) from shutil import rmtree #warning("NOT CLEANING UP") rmtree(temp_dir) except: print_exc()
def query(r,d,query_sdf,ref_iddb,embedOnly=False): current_dir = os.path.abspath(".") query_sdf = os.path.join(current_dir,query_sdf) ref_iddb = os.path.join(current_dir,ref_iddb) work_dir = os.path.join(current_dir,'run-%s-%s' % (r, d)) if not os.path.isfile(query_sdf): raise StandardError("query file "+query_sdf+" not found") if not os.path.isfile(ref_iddb): raise StandardError("reference file "+ref_iddb+" not found") if not os.path.isdir(work_dir): raise StandardError("working directory "+work_dir+" not found") temp_dir=mkdtemp() query_base = os.path.splitext(os.path.basename(query_sdf))[0] query_cdb = os.path.join(temp_dir,query_base+".cdb") matrix_file = os.path.join(work_dir,"matrix.%s-%s" % (r,d)) coord_file = ref_iddb+".distmat.coord" try: #TODO move this to non-batch branch and check parsing_time,num_compounds = createQueryCdb(query_sdf,query_cdb) info("found %s compounds" % num_compounds) ref_db = gen_subdb(ref_iddb,None,DB_SUBSET,CDB) names = [i.strip() for i in file(CDB+".names")] f = file(query_base+".out",'w') os.chdir(temp_dir) if embedOnly or num_compounds > 1: batchQuery(f,r,d,ref_db,query_sdf, coord_file,matrix_file,names,embedOnly) else: puzzle_file = os.path.join(temp_dir,"puzzle") refineResult = refine(QueryFile(query_cdb), lshSearch(matrix_file, solvePuzzle(r,d,ref_db,query_cdb, coord_file,puzzle_file))) for seq_id,dist in refineResult: f.write('%s %s\n' %(names[int(seq_id)-1],dist)) info("refine result: "+str(refineResult)) f.close() os.chdir(current_dir) from shutil import rmtree #warning("NOT CLEANING UP") rmtree(temp_dir) except: print_exc()
sys.stderr.write("Invalid reference coordinate file: no such file.") sys.stderr.write("Missing:\n") sys.stderr.write(ref_coord) sys.exit(1) f = file(ref_coord) d = len(f.readline().split()) for i, _ in enumerate(f): pass r = 2 + i sys.stderr.write("r = %d d = %d\n" % (r, d)) db_builder = DB_BUILDER db2db_distance = DB2DB_DISTANCE if opts.m: db_builder += ('.' + opts.m) db2db_distance += ('.' + opts.m) # this assumes absolute paths, which we don't need to have #if not os.path.isfile(db_builder): #sys.stderr.write("Cannot find database builder to parse your input") #sys.stderr.write("\nI cannot find: ") #sys.stderr.write(db_builder) #sys.exit(1) ref_real_db = gen_subdb(ref_db_path, opts.m,DB_SUBSET,CDB) print embed(args[0], r, d, ref_real_db, ref_coord, db_builder, db2db_distance) # vim:tw=78:ts=4:sw=4:expandtab
sys.stderr.write(ref_coord) sys.exit(1) f = file(ref_coord) d = len(f.readline().split()) for i, _ in enumerate(f): pass r = 2 + i sys.stderr.write("r = %d d = %d\n" % (r, d)) db_builder = DB_BUILDER db2db_distance = DB2DB_DISTANCE if opts.m: db_builder += ('.' + opts.m) db2db_distance += ('.' + opts.m) # this assumes absolute paths, which we don't need to have #if not os.path.isfile(db_builder): #sys.stderr.write("Cannot find database builder to parse your input") #sys.stderr.write("\nI cannot find: ") #sys.stderr.write(db_builder) #sys.exit(1) ref_real_db = gen_subdb(ref_db_path, opts.m, DB_SUBSET, CDB) print embed(args[0], r, d, ref_real_db, ref_coord, db_builder, db2db_distance) # vim:tw=78:ts=4:sw=4:expandtab