Esempio n. 1
0
def perform_walk():
    for path in get_path_map():
        try:
            with oscar.context(path) as context:
                walk.walk(context, path)
        except IOError:
            oscar.log.error("IOError (share deleted, perhaps)")
Esempio n. 2
0
def update_file(base_dir, uuid, real_path):
    hasher = hashlib.sha1()
    try:
        with open(real_path, "rb") as afile:
            stat = os.fstat(afile.fileno())
            size = stat.st_size
            mtime = stat.st_mtime
            buf = afile.read(blocksize)
            while len(buf) > 0:
                hasher.update(buf)
                buf = afile.read(blocksize)
    except IOError:# ファイルが絶妙なタイミングで削除されたなど
        logging.exception("calculating hash") 
        with oscar.context(base_dir, oscar.min_free_blocks) as context:
            delete.delete_by_uuid(context, uuid)

    row = {"_key":uuid, "size":size, "mtime":mtime, "dirty":False}
    hashval = hasher.hexdigest()

    extracted_content = None
    if fulltext_already_exists(base_dir, hashval):
        #logging.debug("Fulltext already exists %s" % hashval)
        row["fulltext"] = hashval
    else:
        try:
            if size <= fulltext_max_file_size: # ファイルサイズが規定値以下の場合に限りfulltextをextractする
                extracted_content = extract.extract(real_path)
        except Exception, e: # 多様なフォーマットを扱うためどういう例外が起こるかまるでわからん
            log.create_log(base_dir, "extract", u"%s (%s): %s" % (real_path.decode("utf-8"), hashval, e.message.decode("utf-8")))
Esempio n. 3
0
def run(args):
    for base_dir in args.base_dir:
        if raw_input("Are you sure to truncate database at %s? ('yes' if sure): " % base_dir) == "yes":
            with oscar.context(base_dir) as context:
                truncate(context)
        else:
            print("Looks like you're sane.")
Esempio n. 4
0
def set_one(base_dir, config_name, value):
    with oscar.context(base_dir) as context:
        with oscar.command(context, "load") as command:
            command.add_argument("table", "Config")
            row = {"_key":"config_name","value":"value"}
            command.add_argument("values", oscar.to_json([row]))
            command.execute()
Esempio n. 5
0
def put_all(base_dir, configs):
    with oscar.context(base_dir) as context:
        with oscar.command(context, "load") as command:
            command.add_argument("table", "Config")
            rows = map(lambda (x,y):{"_key":x,"value":oscar.to_json(y)}, configs.items())
            command.add_argument("values", oscar.to_json(rows))
            command.execute()
Esempio n. 6
0
def consume(base_dir,
            limit=100,
            concurrency=1,
            id_prefix=None,
            utf8_check=False):
    with oscar.context(base_dir) as context:
        with oscar.command(context, "select") as command:
            command.add_argument("table", "FileQueue")
            command.add_argument("output_columns", "_id,_key,name")
            if id_prefix:
                command.add_argument(
                    "filter",
                    "_key @^ \"%s\"" % oscar.escape_for_groonga(id_prefix))
            command.add_argument("sortby", "size")
            command.add_argument("limit", str(limit))
            rows = json.loads(command.execute())[0][2:]

    jobs = map(lambda x: (base_dir, x[0], x[2].encode("utf-8"), utf8_check),
               rows)
    if concurrency > 1:
        pool = multiprocessing.Pool(
            concurrency, lambda: signal.signal(signal.SIGINT, signal.SIG_IGN))
        try:
            rst = sum(pool.map(add_file, jobs))
            pool.close()
        except KeyboardInterrupt:
            pool.terminate()
        finally:
            pool.join()
    else:
        rst = sum(map(add_file, jobs))
    return rst
Esempio n. 7
0
def exec_search(share_name):
    share = oscar.get_share(share_name)
    if share == None: return "Share not found", 404

    check_access_credential(share)

    path = flask.request.args.get("path") or ""
    q = flask.request.args.get("q")

    offset = int(flask.request.args.get("offset") or "0")
    limit = int(flask.request.args.get("limit") or "20")

    if q == "" or q == None:
        return flask.jsonify({"count":0, "rows":[]})

    start_time = time.clock()
    with oscar.context(share.real_path("/")) as context:
        result = search.search(context,path,q, offset, limit)
    for row in result["rows"]:
        path = oscar.remove_preceding_slash(row["path"].encode("utf-8"))
        row["exists"] = os.path.exists(os.path.join(share.path, path,row["name"].encode("utf-8")))
    search_time = time.clock() - start_time
    result["q"] = q
    result["time"] = search_time
    #time.sleep(3)
    return flask.jsonify(result)
Esempio n. 8
0
def run(args):
    with oscar.context(args.base_dir) as context:
        result = search(context, args.path, args.keyword)

    print "count=%d" % result["count"]
    for row in result["rows"]:
        print "%s %s%s %s%s%s" % (row["key"],row["path"],row["name"],row["snippets"]["path"],row["snippets"]["name"],row["snippets"]["content"])
Esempio n. 9
0
def create_log(base_dir, category, content):
    row = {"time": time.time(), "category": category, "content": content}
    with oscar.context(base_dir) as context:
        with oscar.command(context, "load") as command:
            command.add_argument("table", "Log")
            command.add_argument("values", oscar.to_json([row]))
            command.execute()
Esempio n. 10
0
def create_log(base_dir, category, content):
    row = {
        "time":time.time(),
        "category":category,
        "content":content
    }
    with oscar.context(base_dir, oscar.min_free_blocks) as context:
        groonga.load(context, "Log", row)
Esempio n. 11
0
def show_one(base_dir, config_name):
    with oscar.context(base_dir) as context:
        with oscar.command(context, "select") as command:
            command.add_argument("table", "Config")
            command.add_argument("filter", "_key == \"%s\"" % config_name)
            rows = json.loads(command.execute())[0][2:]
    if len(rows) > 0:
        print rows[0]
Esempio n. 12
0
def add_file(args):
    base_dir, _id, filename, utf8_check = args
    with oscar.context(base_dir) as context:
        add.add_file(context, base_dir, filename, utf8_check)
        with oscar.command(context, "delete") as command:
            command.add_argument("table", "FileQueue")
            command.add_argument("id", str(_id))
            command.execute()
    return 1
Esempio n. 13
0
def run(args):
    share_registry = oscar.ShareRegistry()
    for base_dir in args.base_dir:
        with oscar.context(base_dir) as context: pass # just check if exists
        share_registry.register_share(oscar.Share(os.path.basename(oscar.remove_trailing_slash(base_dir)),base_dir))
    oscar.set_share_registry(share_registry)

    oscar.log.debug("Starting web...")
    app.run(host='0.0.0.0',debug=True)
Esempio n. 14
0
def run(args):
    logging.debug("RUN")
    logger.debug("run")
    for base_dir in args.base_dir:
        if not os.path.isfile(oscar.get_database_name(base_dir)):
            logger.error("%s is not a proper base_dir" % base_dir)
            continue
        with oscar.context(base_dir, oscar.min_free_blocks) as context:
            gc(context)
Esempio n. 15
0
def init(base_dir_or_context):
    if groonga.is_context(base_dir_or_context):
        _init(base_dir_or_context)
    else:
        basedir_already_exists = oscar.discover_basedir(oscar.get_parent_dir(base_dir_or_context))
        if basedir_already_exists is not None:
            raise Exception("Directory %s looks like having database already" % base_dir_or_context)
        with oscar.context(base_dir_or_context, oscar.min_free_blocks, create=True) as context:
            _init(context)
Esempio n. 16
0
def process_file_event(share, event_mask, event_pathname):
    if (event_mask & pyinotify.IN_CLOSE_WRITE) or (
            event_mask & pyinotify.IN_MOVED_TO):  # @UndefinedVariable
        oscar.log.debug(u"Adding %s to %s" %
                        (event_pathname.decode("utf-8"), share.name))
        with oscar.context(share.path) as context:
            walk.enqueue(context, share.path, event_pathname)
    elif (event_mask & pyinotify.IN_DELETE) or (
            event_mask & pyinotify.IN_MOVED_FROM):  # @UndefinedVariable
        file_id = oscar.sha1(event_pathname)
        oscar.log.debug(u"Removing %s from %s(%s)" %
                        (event_pathname.decode("utf-8"), share.name, file_id))
        with oscar.context(share.path) as context:
            with oscar.command(context, "delete") as command:
                command.add_argument("table", "FileQueue")
                command.add_argument("key", file_id)
                command.execute()
            with oscar.command(context, "delete") as command:
                command.add_argument("table", "Files")
                command.add_argument("key", file_id)
                command.execute()
Esempio n. 17
0
def get(base_dir, config_name = None):
    with oscar.context(base_dir) as context:
        with context.command("select") as command:
            command.add_argument("table", "Config")
            if config_name: command.add_argument("filter", "_key == \"%s\"" % command.escape(config_name))
            rows = json.loads(command.execute())[0][2:]
    if config_name:
        return json.loads(rows[0][2]) if len(rows) > 0 else None
    #else
    result = {}
    for row in rows:
        result[row[1]] = json.loads(row[2])
    return result
Esempio n. 18
0
def get(base_dir, config_name = None):
    with oscar.context(base_dir) as context:
        with oscar.command(context, "select") as command:
            command.add_argument("table", "Config")
            if config_name: command.add_argument("filter", "_key == \"%s\"" % config_name)
            rows = json.loads(command.execute())[0][2:]
    if config_name:
        return json.loads(rows[0][2]) if len(rows) > 0 else None
    #else
    result = {}
    for row in rows:
        result[row[1]] = json.loads(row[2])
    return result
Esempio n. 19
0
def get_log(base_dir, category = None, offset = None, limit = None):
    with oscar.context(base_dir) as context:
        with context.command("select") as command:
            command.add_argument("table", "Log")
            command.add_argument("output_columns", "time,category,content")
            if category: command.add_argument("filter", "category == \"%s\"" % command.escape(category))
            if offset: command.add_argument("offset", str(offset))
            if limit: command.add_argument("limit", str(limit))
            command.add_argument("sortby", "-time")
            result = json.loads(command.execute())
    return {
        "count":result[0][0][0],
        "rows":map(lambda row:{"time":row[0],"category":row[1],"content":row[2]}, result[0][2:])
    }
Esempio n. 20
0
def share_info(share_name):
    share = oscar.get_share(share_name)
    if share == None: return "Share not found", 404

    path = oscar.remove_preceding_slash(flask.request.args.get("path") or "")
    if not os.path.isdir(share.real_path(path)):
        return "Dir not found", 404
    
    check_access_credential(share)

    if path != "" and not path.endswith("/"): path = path + "/"
    with oscar.context(share.real_path("/")) as context:
        with oscar.command(context, "select") as command:
            command.add_argument("table", "Files")
            if path != "": command.add_argument("filter", "path @^ \"%s\"" % oscar.escape_for_groonga(path))
            command.add_argument("limit", "0")
            count = json.loads(command.execute())[0][0][0]
        with oscar.command(context, "select") as command:
            command.add_argument("table", "FileQueue")
            command.add_argument("limit", "0")
            queued = json.loads(command.execute())[0][0][0]
    
    return flask.jsonify({"share_name":share_name,"count":count,"queued":queued,"eden":is_eden(flask.request)})
Esempio n. 21
0
def get_log(base_dir, category=None, offset=None, limit=None):
    with oscar.context(base_dir) as context:
        with oscar.command(context, "select") as command:
            command.add_argument("table", "Log")
            command.add_argument("output_columns", "time,category,content")
            if category:
                command.add_argument(
                    "filter",
                    "category == \"%s\"" % oscar.escape_for_groonga(category))
            if offset: command.add_argument("offset", str(offset))
            if limit: command.add_argument("limit", str(limit))
            command.add_argument("sortby", "-time")
            result = json.loads(command.execute())
    return {
        "count":
        result[0][0][0],
        "rows":
        map(
            lambda row: {
                "time": row[0],
                "category": row[1],
                "content": row[2]
            }, result[0][2:])
    }
Esempio n. 22
0
def put(base_dir, config_name, value):
    with oscar.context(base_dir, oscar.min_free_blocks) as context:
        groonga.load(context, "Config", {"_key":config_name,"value":oscar.to_json(value)})
Esempio n. 23
0
def fulltext_already_exists(base_dir, hashval):
    with oscar.context(base_dir) as context:
        return groonga.get(context, "Fulltext", hashval, "_key") is not None
Esempio n. 24
0
    row = {"_key":uuid, "size":size, "mtime":mtime, "dirty":False}
    hashval = hasher.hexdigest()

    extracted_content = None
    if fulltext_already_exists(base_dir, hashval):
        #logging.debug("Fulltext already exists %s" % hashval)
        row["fulltext"] = hashval
    else:
        try:
            if size <= fulltext_max_file_size: # ファイルサイズが規定値以下の場合に限りfulltextをextractする
                extracted_content = extract.extract(real_path)
        except Exception, e: # 多様なフォーマットを扱うためどういう例外が起こるかまるでわからん
            log.create_log(base_dir, "extract", u"%s (%s): %s" % (real_path.decode("utf-8"), hashval, e.message.decode("utf-8")))
    
    with oscar.context(base_dir, oscar.min_free_blocks) as context:
        if extracted_content:
            title, content = extracted_content
            groonga.load(context, "Fulltext", {"_key":hashval, "title":title, "content":content})
            row["fulltext"] = hashval
    
        groonga.load(context, "Entries", row)

def _update(base_dir, context, concurrency = 1, limit = 1000):
    files_to_update = []

    total, rows = groonga.select(context, "Entries", output_columns="_key,parent,size", filter="dirty", limit=limit)
    if len(rows) == 0: return
    for row in rows:
        uuid, parent, size = row
        if parent == "":    # parentが "" なレコードは orphanなので無条件に削除対象となる
Esempio n. 25
0
def add(base_dir, name, context = None):
    if context:
        return _add(base_dir, name, context)
    else:
        with oscar.context(base_dir, oscar.min_free_blocks) as context:
            return _add(base_dir, name, context)
Esempio n. 26
0
def truncate(base_dir_or_context, truncate_fulltext = False, truncate_log = False):
    if groonga.is_context(base_dir_or_context):
        return _truncate(base_dir_or_context, truncate_fulltext, truncate_log)
    else:
        with oscar.context(base_dir_or_context) as context: # assume base_dir
            return _truncate(context, truncate_fulltext, truncate_log)
Esempio n. 27
0
def add_by_real_path(file):
    base_dir = oscar.discover_basedir(file)
    with oscar.context(base_dir, oscar.min_free_blocks) as context:
        add(base_dir, file[:len(base_dir)], context)
Esempio n. 28
0
def walk(base_dir, context = None):
    if context:
        _walk(base_dir, context)
    else:
        with oscar.context(base_dir, oscar.min_free_blocks) as ctx:
            _walk(base_dir, ctx)
Esempio n. 29
0
def search(base_dir_or_context, path, query=None, offset=None, limit=None, dirty=None):
    if groonga.is_context(base_dir_or_context):
        return _search(base_dir_or_context, path, query, offset, limit, dirty)
    else:
        with oscar.context(base_dir_or_context) as context:
            return _search(context, path, query, offset, limit, dirty)
Esempio n. 30
0
def init(base_dir):
    with oscar.context(base_dir, True) as context:
        create_table(context)
Esempio n. 31
0
def update(base_dir, context = None, concurrency = 1, limit = 1000):
    if context:
        _update(base_dir, context, concurrency, limit)
    else:
        with oscar.context(base_dir, oscar.min_free_blocks) as context:
            _update(base_dir, context, concurrency, limit)
Esempio n. 32
0
def run(args):
    with oscar.context(args.base_dir) as context:
        for filename in args.args:
            add_file(context, args.base_dir, filename, args.utf8_check)

    oscar.log.info("Files added.")
Esempio n. 33
0
def perform_search():
    with oscar.context(base_dir) as context:
        return search.search(context, "", "公募", 0, 10)
Esempio n. 34
0
def put_all(base_dir, configs):
    with oscar.context(base_dir, oscar.min_free_blocks) as context:
        groonga.load(context, "Config", map(lambda (x,y):{"_key":x,"value":oscar.to_json(y)}, configs.items()))
Esempio n. 35
0
def show_one(base_dir, config_name):
    with oscar.context(base_dir) as context:
        print groonga.get(context, "Config", config_name)
Esempio n. 36
0
def set_one(base_dir, config_name, value):
    with oscar.context(base_dir, oscar.min_free_blocks) as context:
        groonga.load(context, "Config", {"_key":"config_name","value":"value"})
Esempio n. 37
0
def delete_by_real_path(file):
    base_dir = oscar.discover_basedir(file)
    with oscar.context(base_dir, oscar.min_free_blocks) as context:
        return delete(base_dir, file[:len(base_dir)], context)
Esempio n. 38
0
def run(args):
    for base_dir in args.base_dir:
        with oscar.context(base_dir) as context:
            return cleanup(context, base_dir)