def perform_walk(): for path in get_path_map(): try: with oscar.context(path) as context: walk.walk(context, path) except IOError: oscar.log.error("IOError (share deleted, perhaps)")
def update_file(base_dir, uuid, real_path): hasher = hashlib.sha1() try: with open(real_path, "rb") as afile: stat = os.fstat(afile.fileno()) size = stat.st_size mtime = stat.st_mtime buf = afile.read(blocksize) while len(buf) > 0: hasher.update(buf) buf = afile.read(blocksize) except IOError:# ファイルが絶妙なタイミングで削除されたなど logging.exception("calculating hash") with oscar.context(base_dir, oscar.min_free_blocks) as context: delete.delete_by_uuid(context, uuid) row = {"_key":uuid, "size":size, "mtime":mtime, "dirty":False} hashval = hasher.hexdigest() extracted_content = None if fulltext_already_exists(base_dir, hashval): #logging.debug("Fulltext already exists %s" % hashval) row["fulltext"] = hashval else: try: if size <= fulltext_max_file_size: # ファイルサイズが規定値以下の場合に限りfulltextをextractする extracted_content = extract.extract(real_path) except Exception, e: # 多様なフォーマットを扱うためどういう例外が起こるかまるでわからん log.create_log(base_dir, "extract", u"%s (%s): %s" % (real_path.decode("utf-8"), hashval, e.message.decode("utf-8")))
def run(args): for base_dir in args.base_dir: if raw_input("Are you sure to truncate database at %s? ('yes' if sure): " % base_dir) == "yes": with oscar.context(base_dir) as context: truncate(context) else: print("Looks like you're sane.")
def set_one(base_dir, config_name, value): with oscar.context(base_dir) as context: with oscar.command(context, "load") as command: command.add_argument("table", "Config") row = {"_key":"config_name","value":"value"} command.add_argument("values", oscar.to_json([row])) command.execute()
def put_all(base_dir, configs): with oscar.context(base_dir) as context: with oscar.command(context, "load") as command: command.add_argument("table", "Config") rows = map(lambda (x,y):{"_key":x,"value":oscar.to_json(y)}, configs.items()) command.add_argument("values", oscar.to_json(rows)) command.execute()
def consume(base_dir, limit=100, concurrency=1, id_prefix=None, utf8_check=False): with oscar.context(base_dir) as context: with oscar.command(context, "select") as command: command.add_argument("table", "FileQueue") command.add_argument("output_columns", "_id,_key,name") if id_prefix: command.add_argument( "filter", "_key @^ \"%s\"" % oscar.escape_for_groonga(id_prefix)) command.add_argument("sortby", "size") command.add_argument("limit", str(limit)) rows = json.loads(command.execute())[0][2:] jobs = map(lambda x: (base_dir, x[0], x[2].encode("utf-8"), utf8_check), rows) if concurrency > 1: pool = multiprocessing.Pool( concurrency, lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)) try: rst = sum(pool.map(add_file, jobs)) pool.close() except KeyboardInterrupt: pool.terminate() finally: pool.join() else: rst = sum(map(add_file, jobs)) return rst
def exec_search(share_name): share = oscar.get_share(share_name) if share == None: return "Share not found", 404 check_access_credential(share) path = flask.request.args.get("path") or "" q = flask.request.args.get("q") offset = int(flask.request.args.get("offset") or "0") limit = int(flask.request.args.get("limit") or "20") if q == "" or q == None: return flask.jsonify({"count":0, "rows":[]}) start_time = time.clock() with oscar.context(share.real_path("/")) as context: result = search.search(context,path,q, offset, limit) for row in result["rows"]: path = oscar.remove_preceding_slash(row["path"].encode("utf-8")) row["exists"] = os.path.exists(os.path.join(share.path, path,row["name"].encode("utf-8"))) search_time = time.clock() - start_time result["q"] = q result["time"] = search_time #time.sleep(3) return flask.jsonify(result)
def run(args): with oscar.context(args.base_dir) as context: result = search(context, args.path, args.keyword) print "count=%d" % result["count"] for row in result["rows"]: print "%s %s%s %s%s%s" % (row["key"],row["path"],row["name"],row["snippets"]["path"],row["snippets"]["name"],row["snippets"]["content"])
def create_log(base_dir, category, content): row = {"time": time.time(), "category": category, "content": content} with oscar.context(base_dir) as context: with oscar.command(context, "load") as command: command.add_argument("table", "Log") command.add_argument("values", oscar.to_json([row])) command.execute()
def create_log(base_dir, category, content): row = { "time":time.time(), "category":category, "content":content } with oscar.context(base_dir, oscar.min_free_blocks) as context: groonga.load(context, "Log", row)
def show_one(base_dir, config_name): with oscar.context(base_dir) as context: with oscar.command(context, "select") as command: command.add_argument("table", "Config") command.add_argument("filter", "_key == \"%s\"" % config_name) rows = json.loads(command.execute())[0][2:] if len(rows) > 0: print rows[0]
def add_file(args): base_dir, _id, filename, utf8_check = args with oscar.context(base_dir) as context: add.add_file(context, base_dir, filename, utf8_check) with oscar.command(context, "delete") as command: command.add_argument("table", "FileQueue") command.add_argument("id", str(_id)) command.execute() return 1
def run(args): share_registry = oscar.ShareRegistry() for base_dir in args.base_dir: with oscar.context(base_dir) as context: pass # just check if exists share_registry.register_share(oscar.Share(os.path.basename(oscar.remove_trailing_slash(base_dir)),base_dir)) oscar.set_share_registry(share_registry) oscar.log.debug("Starting web...") app.run(host='0.0.0.0',debug=True)
def run(args): logging.debug("RUN") logger.debug("run") for base_dir in args.base_dir: if not os.path.isfile(oscar.get_database_name(base_dir)): logger.error("%s is not a proper base_dir" % base_dir) continue with oscar.context(base_dir, oscar.min_free_blocks) as context: gc(context)
def init(base_dir_or_context): if groonga.is_context(base_dir_or_context): _init(base_dir_or_context) else: basedir_already_exists = oscar.discover_basedir(oscar.get_parent_dir(base_dir_or_context)) if basedir_already_exists is not None: raise Exception("Directory %s looks like having database already" % base_dir_or_context) with oscar.context(base_dir_or_context, oscar.min_free_blocks, create=True) as context: _init(context)
def process_file_event(share, event_mask, event_pathname): if (event_mask & pyinotify.IN_CLOSE_WRITE) or ( event_mask & pyinotify.IN_MOVED_TO): # @UndefinedVariable oscar.log.debug(u"Adding %s to %s" % (event_pathname.decode("utf-8"), share.name)) with oscar.context(share.path) as context: walk.enqueue(context, share.path, event_pathname) elif (event_mask & pyinotify.IN_DELETE) or ( event_mask & pyinotify.IN_MOVED_FROM): # @UndefinedVariable file_id = oscar.sha1(event_pathname) oscar.log.debug(u"Removing %s from %s(%s)" % (event_pathname.decode("utf-8"), share.name, file_id)) with oscar.context(share.path) as context: with oscar.command(context, "delete") as command: command.add_argument("table", "FileQueue") command.add_argument("key", file_id) command.execute() with oscar.command(context, "delete") as command: command.add_argument("table", "Files") command.add_argument("key", file_id) command.execute()
def get(base_dir, config_name = None): with oscar.context(base_dir) as context: with context.command("select") as command: command.add_argument("table", "Config") if config_name: command.add_argument("filter", "_key == \"%s\"" % command.escape(config_name)) rows = json.loads(command.execute())[0][2:] if config_name: return json.loads(rows[0][2]) if len(rows) > 0 else None #else result = {} for row in rows: result[row[1]] = json.loads(row[2]) return result
def get(base_dir, config_name = None): with oscar.context(base_dir) as context: with oscar.command(context, "select") as command: command.add_argument("table", "Config") if config_name: command.add_argument("filter", "_key == \"%s\"" % config_name) rows = json.loads(command.execute())[0][2:] if config_name: return json.loads(rows[0][2]) if len(rows) > 0 else None #else result = {} for row in rows: result[row[1]] = json.loads(row[2]) return result
def get_log(base_dir, category = None, offset = None, limit = None): with oscar.context(base_dir) as context: with context.command("select") as command: command.add_argument("table", "Log") command.add_argument("output_columns", "time,category,content") if category: command.add_argument("filter", "category == \"%s\"" % command.escape(category)) if offset: command.add_argument("offset", str(offset)) if limit: command.add_argument("limit", str(limit)) command.add_argument("sortby", "-time") result = json.loads(command.execute()) return { "count":result[0][0][0], "rows":map(lambda row:{"time":row[0],"category":row[1],"content":row[2]}, result[0][2:]) }
def share_info(share_name): share = oscar.get_share(share_name) if share == None: return "Share not found", 404 path = oscar.remove_preceding_slash(flask.request.args.get("path") or "") if not os.path.isdir(share.real_path(path)): return "Dir not found", 404 check_access_credential(share) if path != "" and not path.endswith("/"): path = path + "/" with oscar.context(share.real_path("/")) as context: with oscar.command(context, "select") as command: command.add_argument("table", "Files") if path != "": command.add_argument("filter", "path @^ \"%s\"" % oscar.escape_for_groonga(path)) command.add_argument("limit", "0") count = json.loads(command.execute())[0][0][0] with oscar.command(context, "select") as command: command.add_argument("table", "FileQueue") command.add_argument("limit", "0") queued = json.loads(command.execute())[0][0][0] return flask.jsonify({"share_name":share_name,"count":count,"queued":queued,"eden":is_eden(flask.request)})
def get_log(base_dir, category=None, offset=None, limit=None): with oscar.context(base_dir) as context: with oscar.command(context, "select") as command: command.add_argument("table", "Log") command.add_argument("output_columns", "time,category,content") if category: command.add_argument( "filter", "category == \"%s\"" % oscar.escape_for_groonga(category)) if offset: command.add_argument("offset", str(offset)) if limit: command.add_argument("limit", str(limit)) command.add_argument("sortby", "-time") result = json.loads(command.execute()) return { "count": result[0][0][0], "rows": map( lambda row: { "time": row[0], "category": row[1], "content": row[2] }, result[0][2:]) }
def put(base_dir, config_name, value): with oscar.context(base_dir, oscar.min_free_blocks) as context: groonga.load(context, "Config", {"_key":config_name,"value":oscar.to_json(value)})
def fulltext_already_exists(base_dir, hashval): with oscar.context(base_dir) as context: return groonga.get(context, "Fulltext", hashval, "_key") is not None
row = {"_key":uuid, "size":size, "mtime":mtime, "dirty":False} hashval = hasher.hexdigest() extracted_content = None if fulltext_already_exists(base_dir, hashval): #logging.debug("Fulltext already exists %s" % hashval) row["fulltext"] = hashval else: try: if size <= fulltext_max_file_size: # ファイルサイズが規定値以下の場合に限りfulltextをextractする extracted_content = extract.extract(real_path) except Exception, e: # 多様なフォーマットを扱うためどういう例外が起こるかまるでわからん log.create_log(base_dir, "extract", u"%s (%s): %s" % (real_path.decode("utf-8"), hashval, e.message.decode("utf-8"))) with oscar.context(base_dir, oscar.min_free_blocks) as context: if extracted_content: title, content = extracted_content groonga.load(context, "Fulltext", {"_key":hashval, "title":title, "content":content}) row["fulltext"] = hashval groonga.load(context, "Entries", row) def _update(base_dir, context, concurrency = 1, limit = 1000): files_to_update = [] total, rows = groonga.select(context, "Entries", output_columns="_key,parent,size", filter="dirty", limit=limit) if len(rows) == 0: return for row in rows: uuid, parent, size = row if parent == "": # parentが "" なレコードは orphanなので無条件に削除対象となる
def add(base_dir, name, context = None): if context: return _add(base_dir, name, context) else: with oscar.context(base_dir, oscar.min_free_blocks) as context: return _add(base_dir, name, context)
def truncate(base_dir_or_context, truncate_fulltext = False, truncate_log = False): if groonga.is_context(base_dir_or_context): return _truncate(base_dir_or_context, truncate_fulltext, truncate_log) else: with oscar.context(base_dir_or_context) as context: # assume base_dir return _truncate(context, truncate_fulltext, truncate_log)
def add_by_real_path(file): base_dir = oscar.discover_basedir(file) with oscar.context(base_dir, oscar.min_free_blocks) as context: add(base_dir, file[:len(base_dir)], context)
def walk(base_dir, context = None): if context: _walk(base_dir, context) else: with oscar.context(base_dir, oscar.min_free_blocks) as ctx: _walk(base_dir, ctx)
def search(base_dir_or_context, path, query=None, offset=None, limit=None, dirty=None): if groonga.is_context(base_dir_or_context): return _search(base_dir_or_context, path, query, offset, limit, dirty) else: with oscar.context(base_dir_or_context) as context: return _search(context, path, query, offset, limit, dirty)
def init(base_dir): with oscar.context(base_dir, True) as context: create_table(context)
def update(base_dir, context = None, concurrency = 1, limit = 1000): if context: _update(base_dir, context, concurrency, limit) else: with oscar.context(base_dir, oscar.min_free_blocks) as context: _update(base_dir, context, concurrency, limit)
def run(args): with oscar.context(args.base_dir) as context: for filename in args.args: add_file(context, args.base_dir, filename, args.utf8_check) oscar.log.info("Files added.")
def perform_search(): with oscar.context(base_dir) as context: return search.search(context, "", "公募", 0, 10)
def put_all(base_dir, configs): with oscar.context(base_dir, oscar.min_free_blocks) as context: groonga.load(context, "Config", map(lambda (x,y):{"_key":x,"value":oscar.to_json(y)}, configs.items()))
def show_one(base_dir, config_name): with oscar.context(base_dir) as context: print groonga.get(context, "Config", config_name)
def set_one(base_dir, config_name, value): with oscar.context(base_dir, oscar.min_free_blocks) as context: groonga.load(context, "Config", {"_key":"config_name","value":"value"})
def delete_by_real_path(file): base_dir = oscar.discover_basedir(file) with oscar.context(base_dir, oscar.min_free_blocks) as context: return delete(base_dir, file[:len(base_dir)], context)
def run(args): for base_dir in args.base_dir: with oscar.context(base_dir) as context: return cleanup(context, base_dir)