def create_log(base_dir, category, content): row = {"time": time.time(), "category": category, "content": content} with oscar.context(base_dir) as context: with oscar.command(context, "load") as command: command.add_argument("table", "Log") command.add_argument("values", oscar.to_json([row])) command.execute()
def set_one(base_dir, config_name, value): with oscar.context(base_dir) as context: with oscar.command(context, "load") as command: command.add_argument("table", "Config") row = {"_key":"config_name","value":"value"} command.add_argument("values", oscar.to_json([row])) command.execute()
def put_all(base_dir, configs): with oscar.context(base_dir) as context: with oscar.command(context, "load") as command: command.add_argument("table", "Config") rows = map(lambda (x,y):{"_key":x,"value":oscar.to_json(y)}, configs.items()) command.add_argument("values", oscar.to_json(rows)) command.execute()
def consume(base_dir, limit=100, concurrency=1, id_prefix=None, utf8_check=False): with oscar.context(base_dir) as context: with oscar.command(context, "select") as command: command.add_argument("table", "FileQueue") command.add_argument("output_columns", "_id,_key,name") if id_prefix: command.add_argument( "filter", "_key @^ \"%s\"" % oscar.escape_for_groonga(id_prefix)) command.add_argument("sortby", "size") command.add_argument("limit", str(limit)) rows = json.loads(command.execute())[0][2:] jobs = map(lambda x: (base_dir, x[0], x[2].encode("utf-8"), utf8_check), rows) if concurrency > 1: pool = multiprocessing.Pool( concurrency, lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)) try: rst = sum(pool.map(add_file, jobs)) pool.close() except KeyboardInterrupt: pool.terminate() finally: pool.join() else: rst = sum(map(add_file, jobs)) return rst
def show_one(base_dir, config_name): with oscar.context(base_dir) as context: with oscar.command(context, "select") as command: command.add_argument("table", "Config") command.add_argument("filter", "_key == \"%s\"" % config_name) rows = json.loads(command.execute())[0][2:] if len(rows) > 0: print rows[0]
def add_file(args): base_dir, _id, filename, utf8_check = args with oscar.context(base_dir) as context: add.add_file(context, base_dir, filename, utf8_check) with oscar.command(context, "delete") as command: command.add_argument("table", "FileQueue") command.add_argument("id", str(_id)) command.execute() return 1
def process_file_event(share, event_mask, event_pathname): if (event_mask & pyinotify.IN_CLOSE_WRITE) or ( event_mask & pyinotify.IN_MOVED_TO): # @UndefinedVariable oscar.log.debug(u"Adding %s to %s" % (event_pathname.decode("utf-8"), share.name)) with oscar.context(share.path) as context: walk.enqueue(context, share.path, event_pathname) elif (event_mask & pyinotify.IN_DELETE) or ( event_mask & pyinotify.IN_MOVED_FROM): # @UndefinedVariable file_id = oscar.sha1(event_pathname) oscar.log.debug(u"Removing %s from %s(%s)" % (event_pathname.decode("utf-8"), share.name, file_id)) with oscar.context(share.path) as context: with oscar.command(context, "delete") as command: command.add_argument("table", "FileQueue") command.add_argument("key", file_id) command.execute() with oscar.command(context, "delete") as command: command.add_argument("table", "Files") command.add_argument("key", file_id) command.execute()
def cleanup(context, base_dir): offset = 0 total = 1 while offset < total: with oscar.command(context, "select") as command: command.add_argument("table", "Files") command.add_argument("output_columns", "_id,path,name") command.add_argument("offset", str(offset)) result = json.loads(command.execute()) total = result[0][0][0] rows=result[0][2:] #oscar.log.debug("total:%d offset:%d" % (total, offset)) for row in rows: _id,path,name = row[0],row[1].encode("utf-8"),row[2].encode("utf-8") exact_filename = os.path.join(base_dir,path if path != "/" else "",name) if not os.path.isfile(exact_filename): oscar.log.info("Missing file: %s. remove from database" % exact_filename) with oscar.command(context, "delete") as command: command.add_argument("table", "Files") command.add_argument("id", str(_id)) command.execute() offset += len(rows)
def get(base_dir, config_name = None): with oscar.context(base_dir) as context: with oscar.command(context, "select") as command: command.add_argument("table", "Config") if config_name: command.add_argument("filter", "_key == \"%s\"" % config_name) rows = json.loads(command.execute())[0][2:] if config_name: return json.loads(rows[0][2]) if len(rows) > 0 else None #else result = {} for row in rows: result[row[1]] = json.loads(row[2]) return result
def share_info(share_name): share = oscar.get_share(share_name) if share == None: return "Share not found", 404 path = oscar.remove_preceding_slash(flask.request.args.get("path") or "") if not os.path.isdir(share.real_path(path)): return "Dir not found", 404 check_access_credential(share) if path != "" and not path.endswith("/"): path = path + "/" with oscar.context(share.real_path("/")) as context: with oscar.command(context, "select") as command: command.add_argument("table", "Files") if path != "": command.add_argument("filter", "path @^ \"%s\"" % oscar.escape_for_groonga(path)) command.add_argument("limit", "0") count = json.loads(command.execute())[0][0][0] with oscar.command(context, "select") as command: command.add_argument("table", "FileQueue") command.add_argument("limit", "0") queued = json.loads(command.execute())[0][0][0] return flask.jsonify({"share_name":share_name,"count":count,"queued":queued,"eden":is_eden(flask.request)})
def search(context, path, keyword, offset=None, limit=None): path = oscar.remove_preceding_slash(path) if path != "" and not path.endswith("/"): path = path + "/" with oscar.command(context, "select") as command: command.add_argument("table", "Files") command.add_argument("output_columns", "_key,path,name,mtime,size,fulltext.title,snippet_html(path),snippet_html(name),snippet_html(fulltext.content)") command.add_argument("match_columns", "name*10||fulltext.title*10||fulltext.content*5||path_ft") if path != "": command.add_argument("filter", "path @^ \"%s\"" % oscar.escape_for_groonga(path)) command.add_argument("query", keyword) command.add_argument("sortby", "-_score") command.add_argument("command_version", "2") if offset: command.add_argument("offset", str(offset)) if limit: command.add_argument("limit", str(limit)) result = json.loads(command.execute()) return { "count":result[0][0][0], "rows":map(lambda row:{"key":row[0],"path":row[1],"name":row[2],"mtime":row[3],"size":row[4],"title":row[5],"snippets":{"path":row[6],"name":row[7],"content":row[8]}}, result[0][2:]) }
def get_log(base_dir, category=None, offset=None, limit=None): with oscar.context(base_dir) as context: with oscar.command(context, "select") as command: command.add_argument("table", "Log") command.add_argument("output_columns", "time,category,content") if category: command.add_argument( "filter", "category == \"%s\"" % oscar.escape_for_groonga(category)) if offset: command.add_argument("offset", str(offset)) if limit: command.add_argument("limit", str(limit)) command.add_argument("sortby", "-time") result = json.loads(command.execute()) return { "count": result[0][0][0], "rows": map( lambda row: { "time": row[0], "category": row[1], "content": row[2] }, result[0][2:]) }
def add_file(context, base_dir, filename, utf8_check=False): filename = oscar.remove_preceding_slash(filename) exact_filename = os.path.join(base_dir, filename) if not os.path.isfile(exact_filename): oscar.log.error("File %s does not exist" % exact_filename) return False stat = os.stat(exact_filename) if stat.st_size < 50000000: # 50MB以上は大きすぎる file_hash = calc_file_hash(exact_filename) oscar.log.debug("File hash: %s" % file_hash) with oscar.command(context, "select") as command: command.add_argument("table", "Fulltext") command.add_argument("output_columns", "_id") command.add_argument("filter", "_key == '%s'" % file_hash) num_hits = json.loads(command.execute())[0][0][0] if num_hits == 0: # まだ登録されてない場合 extractor = extract.get_extractor(exact_filename) if extractor: try: title, text = extractor(exact_filename) except Exception as e: oscar.log.exception("extractor") log.create_log( base_dir, "extract", u"%s (%s): %s" % (filename.decode("utf-8"), file_hash, e.message.decode("utf-8"))) else: if utf8_check: utf8_check_by_iconv(text) if len( text ) > 3000000: # 3MB以上のテキストは切り捨てる(snippetつきで検索しようとしたときにgroongaが落ちるため) text = text.decode("utf-8")[0:1000000].encode("utf-8") row = {"_key": file_hash, "title": title, "content": text} with oscar.command(context, "load") as command: command.add_argument("table", "Fulltext") command.add_argument("values", oscar.to_json([row])) command.execute() else: oscar.log.debug("%s is too large (%d). the content is ignored" % (filename, stat.st_size)) file_hash = "" # select Files --filter 'name @^ \"walbrix\"' path = os.path.dirname(filename) if not path.endswith('/'): path += '/' row = { "_key": oscar.sha1(filename), "path": path, "path_ft": path, "name": os.path.basename(filename), "mtime": stat.st_mtime, "size": stat.st_size, "fulltext": file_hash } oscar.log.info("Adding: %s" % exact_filename) with oscar.command(context, "load") as command: command.add_argument("table", "Files") command.add_argument("values", oscar.to_json([row])) command.execute() return True