Beispiel #1
0
Datei: api.py Projekt: mhaya/weko
    def get_custom_sort(cls, index_id, sort_type):
        """Get custom sort."""
        if sort_type == "asc":
            factor_obj = Indexes.get_item_sort(index_id)
            script_str = {
                "_script": {
                    "script":
                    "factor.get(doc[\"control_number\"].value)&&factor.get(doc[\"control_number\"].value) !=0 ? factor.get(doc[\"control_number\"].value):Integer.MAX_VALUE",
                    "type": "number",
                    "params": {
                        "factor": factor_obj
                    },
                    "order": "asc"
                }
            }
            default_sort = {'_created': {'order': 'desc'}}
        else:
            factor_obj = Indexes.get_item_sort(index_id)
            script_str = {
                "_script": {
                    "script":
                    "factor.get(doc[\"control_number\"].value)&&factor.get(doc[\"control_number\"].value) !=0 ? factor.get(doc[\"control_number\"].value):0",
                    "type": "number",
                    "params": {
                        "factor": factor_obj
                    },
                    "order": "desc"
                }
            }
            default_sort = {'_created': {'order': 'asc'}}

        return script_str, default_sort
Beispiel #2
0
    def save_sort(self):
        """Save custom sort."""
        try:
            data = request.get_json()
            index_id = data.get("q_id")
            sort_data = data.get("sort")

            # save data to DB
            item_sort = {}
            for sort in sort_data:
                item_sort[sort.get('id')] = sort.get('custom_sort').get(
                    index_id)

            Indexes.set_item_sort_custom(index_id, item_sort)

            # update es
            fp = Indexes.get_self_path(index_id)
            Indexes.update_item_sort_custom_es(fp.path, sort_data)

            jfy = {}
            jfy['status'] = 200
            jfy['message'] = 'Data is successfully updated.'
            return make_response(jsonify(jfy), jfy['status'])
        except Exception as ex:
            jfy['status'] = 405
            jfy['message'] = 'Error'
            return make_response(jsonify(jfy), jfy['status'])
Beispiel #3
0
def get_permission_filter(comm_id=None):
    """Get permission filter."""
    # check permission
    is_perm = search_permission.can()
    match = Q('match', publish_status='0')
    version = Q('match', relation_version_is_last='true')
    rng = Q('range', **{'publish_date': {'lte': 'now/d'}})
    term_list = []
    mst = []
    is_perm_paths = Indexes.get_browsing_tree_paths()
    search_type = request.values.get('search_type')

    if comm_id:
        if search_type == config.WEKO_SEARCH_TYPE_DICT['FULL_TEXT']:
            self_path = Indexes.get_self_path(comm_id)
            if self_path and self_path.path in is_perm_paths:
                term_list.append(self_path.path)

            path = term_list[0] + '*'
            should_path = []
            wildcard_path = Q("wildcard", path=path)
            should_path.append(wildcard_path)

            mst.append(match)
            mst.append(rng)
            terms = Q('bool', should=should_path)
        else:  # In case search_type is keyword or index
            self_path = Indexes.get_self_path(comm_id)
            if self_path and self_path.path in is_perm_paths:
                term_list.append(self_path.path)

            mst.append(match)
            mst.append(rng)
            terms = Q('terms', path=term_list)
    else:
        mst.append(match)
        mst.append(rng)
        terms = Q('terms', path=is_perm_paths)

    mut = []
    if is_perm:
        user_id, result = check_admin_user()
        if result:
            shuld = [
                Q('match', weko_creator_id=user_id),
                Q('match', weko_shared_id=user_id)
            ]
            shuld.append(Q('bool', must=mst))
            mut.append(Q('bool', should=shuld, must=[terms]))
            mut.append(Q('bool', must=version))
    else:
        mut = mst
        mut.append(terms)
        base_mut = [match, version]
        mut.append(Q('bool', must=base_mut))

    return mut
Beispiel #4
0
    def get_custom_sort(cls, index_id, sort_type):
        """Get custom sort."""
        if sort_type == "asc":
            factor_obj = Indexes.get_item_sort(index_id)
            script_str = {
                "_script": {
                    "script": {
                        "source":
                        "if(params.factor.get(doc[\"control_number\"].value.toString())!=null){params.factor.get(doc[\"control_number\"].value.toString())}else{Integer.MAX_VALUE}",
                        "lang": "painless",
                        "params": {
                            "factor": factor_obj
                        }
                    },
                    "type": "number",
                    "order": "asc"
                }
            }
            default_sort = {
                '_created': {
                    'order': 'desc',
                    'unmapped_type': 'long'
                }
            }
        else:
            factor_obj = Indexes.get_item_sort(index_id)
            script_str = {
                "_script": {
                    "script": {
                        "source":
                        "if(params.factor.get(doc[\"control_number\"].value.toString())!=null){params.factor.get(doc[\"control_number\"].value.toString())}else{0}",
                        "lang": "painless",
                        "params": {
                            "factor": factor_obj
                        }
                    },
                    "type": "number",
                    "order": "desc"
                }
            }
            default_sort = {
                '_created': {
                    'order': 'asc',
                    'unmapped_type': 'long'
                }
            }

        return script_str, default_sort
Beispiel #5
0
def get_permission_filter(comm_id=None):
    # check permission
    is_perm = search_permission.can()
    mut = []
    match = Q('match', publish_status='0')
    # ava = [Q('range', **{'date.value': {'lte': 'now/d'}}),
    #        Q('term', **{'date.dateType': 'Available'})]
    # rng = Q('nested', path='date', query=Q('bool', must=ava))
    ava = Q('range', **{'publish_date': {'lte': 'now/d'}})
    rng = ava
    mst = []
    if comm_id is not None:
        path_list = Indexes.get_all_path_list(comm_id)
        match_list = []
        for p_l in path_list:
            match_q = Q('match', path=p_l)
            match_list.append(match_q)
        mst.append(Q('bool', should=match_list))
    if not is_perm:
        mut.append(match)
        mut.append(rng)
        mut.append(get_index_filter()[0])
    else:
        user_id, result = check_admin_user()
        if result:
            shuld = [Q('match', weko_creator_id=user_id)]
            mut2 = [match, rng]
            shuld.append(Q('bool', must=mut2))
            if comm_id is not None:
                mut.append(Q('bool', should=shuld, must=mst))
            else:
                mut.append(Q('bool', should=shuld, must=get_index_filter()))

    return mut
Beispiel #6
0
 def get_record_with_hps(cls, uuid):
     record = cls.get_record(id_=uuid)
     path = []
     path.extend(record.get('path'))
     harvest_public_state = True
     if path:
         harvest_public_state = Indexes.get_harvest_public_state(path)
     return harvest_public_state, record
Beispiel #7
0
    def index(self):
        """Bulk delete items and index trees."""
        if request.method == 'PUT':
            # Do delete items inside the current index tree (maybe root tree)
            q = request.values.get('q')
            if q is not None and q.isdigit():
                current_tree = Indexes.get_index(q)
                recursive_tree = Indexes.get_recursive_tree(q)

                if current_tree is not None:

                    # Delete items in current_tree
                    delete_records(current_tree.id)

                    # If recursively, then delete all child index trees
                    # and theirs items
                    if request.values.get('recursively') == 'true'\
                            and recursive_tree is not None:
                        # Delete recursively
                        direct_child_trees = []
                        for index, obj in enumerate(recursive_tree):
                            if obj[1] != current_tree.id:
                                child_tree = Indexes.get_index(obj[1])

                                # Do delete items in child_tree
                                delete_records(child_tree.id)

                                # Add the level 1 child into the current_tree
                                if obj[0] == current_tree.id:
                                    direct_child_trees.append(child_tree.id)
                        # Then do delete child_tree inside current_tree
                        for cid in direct_child_trees:
                            # Delete this tree and children
                            Indexes.delete(cid)

                    return jsonify({'status': 1})
            else:
                return jsonify({'status': 0, 'msg': 'Invalid tree'})

        """Render view."""
        detail_condition = get_search_detail_keyword('')
        return self.render(
            current_app.config['WEKO_THEME_ADMIN_ITEM_MANAGEMENT_TEMPLATE'],
            management_type='delete',
            detail_condition=detail_condition
        )
Beispiel #8
0
Datei: api.py Projekt: mhaya/weko
 def get_record_cvs(cls, uuid):
     """Get record cvs."""
     record = cls.get_record(id_=uuid)
     path = []
     path.extend(record.get('path'))
     coverpage_state = False
     if path:
         coverpage_state = Indexes.get_coverpage_state(path)
     return coverpage_state
Beispiel #9
0
def get_index_filter():

    paths = Indexes.get_browsing_tree_paths()
    mst = []
    q_list = []
    for path in paths:
        match_q = Q('match', path=path)
        q_list.append(match_q)
    mst.append(Q('bool', should=q_list))

    return mst
Beispiel #10
0
def get_permission_filter(comm_id=None):
    """Get permission filter."""
    # check permission
    is_perm = search_permission.can()
    match = Q('match', publish_status='0')
    version = Q('match', relation_version_is_last='true')
    rng = Q('range', **{'publish_date': {'lte': 'now/d'}})
    term_list = []
    mst = []
    is_perm_paths = Indexes.get_browsing_tree_paths()
    if comm_id is not None:
        self_path = Indexes.get_self_path(comm_id)
        if self_path.path in is_perm_paths:
            term_list.append(self_path.path)

        mst.append(match)
        mst.append(rng)
        terms = Q('terms', path=term_list)
    else:
        mst.append(match)
        mst.append(rng)
        terms = Q('terms', path=is_perm_paths)

    mut = []
    if is_perm:
        user_id, result = check_admin_user()
        if result:
            shuld = [
                Q('match', weko_creator_id=user_id),
                Q('match', weko_shared_id=user_id)
            ]
            shuld.append(Q('bool', must=mst))
            mut.append(Q('bool', should=shuld, must=[terms]))
            mut.append(Q('bool', must=version))
    else:
        mut = mst
        mut.append(terms)
        base_mut = [match, version]
        mut.append(Q('bool', must=base_mut))

    return mut
Beispiel #11
0
    def convert_item_metadata(self, index_obj):
        """
        1. Convert Item Metadata
        2. Inject index tree id to dict
        3. Set Publish Status
        :param index_obj:
        :return: dc
        """
        # if this item has been deleted
        self.delete_es_index_attempt(self.pid)

        try:
            actions = index_obj.get('actions', 'private')
            datastore = RedisStore(redis.StrictRedis.from_url(
                current_app.config['CACHE_REDIS_URL']))
            cache_key = current_app.config[
                'WEKO_DEPOSIT_ITEMS_CACHE_PREFIX'].format(
                pid_value=self.pid.pid_value)

            data_str = datastore.get(cache_key)
            datastore.delete(cache_key)
            data = json.loads(data_str)
        except:
            abort(500, 'Failed to register item')

        # Get index path
        index_lst = index_obj.get('index', [])
        plst = Indexes.get_path_list(index_lst)

        if not plst or len(index_lst) != len(plst):
            raise PIDResolveRESTError(description='Any tree index has been deleted')

        index_lst.clear()
        for lst in plst:
            index_lst.append(lst.path)

        # convert item meta data
        dc, jrc, is_edit = json_loader(data, self.pid)
        self.data = data
        self.jrc = jrc
        self.is_edit = is_edit

        # Save Index Path on ES
        jrc.update(dict(path=index_lst))
        dc.update(dict(path=index_lst))

        pubs = '1' if 'private' in actions else '0'
        ps = dict(publish_status=pubs)
        jrc.update(ps)
        dc.update(ps)

        return dc
Beispiel #12
0
    def save_sort(self):
        """Save custom sort."""
        try:
            data = request.get_json()
            index_id = data.get("q_id")
            sort_data = data.get("sort")

            # save data to DB
            item_sort = {}
            for sort in sort_data:
                sd = sort.get('custom_sort').get(index_id)
                if sd:
                    item_sort[sort.get('id')] = sd

            Indexes.set_item_sort_custom(index_id, item_sort)

            # update es
            # fp = Indexes.get_self_path(index_id)
            # Indexes.update_item_sort_custom_es(fp.path, sort_data)

            jfy = {'status': 200, 'message': 'Data is successfully updated.'}
        except Exception:
            jfy = {'status': 405, 'message': 'Error.'}
        return make_response(jsonify(jfy), jfy['status'])
Beispiel #13
0
def handle_replace_new_index() -> list:
    """Validation importing zip file.

    :argument
    :return
        return       -- index id import item

    """
    from datetime import datetime
    now = datetime.now()
    index_import = Indexes.get_index_by_name("Index_import")
    if index_import:
        return [index_import.id]
    else:
        create_index = Indexes.create(
            pid=0,
            indexes={'id': int(datetime.timestamp(now) * 10 ** 3),
                     'value': 'Index_import'}
        )
        if create_index:
            index_import = Indexes.get_index_by_name("Index_import")
            if index_import:
                return [index_import.id]
        return []
Beispiel #14
0
def handle_check_index(list_index: list) -> bool:
    """Handle check index.

    :argument
        list_index     -- {list} list index id.
    :return
        return       -- true if exist.

    """
    result = True

    index_lst = []
    if list_index:
        index_id_lst = []
        for index in list_index:
            indexes = str(index).split('/')
            index_id_lst.append(indexes[len(indexes) - 1])
        index_lst = index_id_lst

    plst = Indexes.get_path_list(index_lst)
    if not plst or len(index_lst) != len(plst):
        result = False
    return result
Beispiel #15
0
def is_private_index(record):
    """Check index of workflow is private."""
    return not Indexes.is_public_state(copy.deepcopy(record.get("path")))
Beispiel #16
0
    def _get_index_search_query():
        """Get index search query."""
        query_q = {
            "from": 0,
            "size": 10000,
            "_source": {
                "excludes": ["content", "_item_metadata"]
            },
            "query": {
                "bool": {
                    "must": [{
                        "match": {
                            "path.tree": "@index"
                        }
                    }, {
                        "match": {
                            "relation_version_is_last": "true"
                        }
                    }]
                }
            },
            "post_filter": {
                "bool": {
                    "must": [{
                        "match": {
                            "publish_status": "0"
                        }
                    }, {
                        "range": {
                            "publish_date": {
                                "lte": "now/d"
                            }
                        }
                    }]
                }
            }
        }

        q = str(index_id)
        if q != str(current_app.config.get("WEKO_ROOT_INDEX",
                                           WEKO_ROOT_INDEX)):
            post_filter = query_q['post_filter']

            if post_filter:
                list_path = Indexes.get_list_path_publish(index_id)
                post_filter['bool']['must'].append(
                    {"terms": {
                        "path": list_path
                    }})
            # create search query
            try:
                fp = Indexes.get_self_path(q)
                query_q = json.dumps(query_q).replace("@index", fp.path)
                query_q = json.loads(query_q)
            except BaseException:
                pass
        else:
            post_filter = query_q['post_filter']

            if post_filter:
                list_path = Indexes.get_list_path_publish(index_id)
                post_filter['bool']['must'].append(
                    {"terms": {
                        "path": list_path
                    }})
            wild_card = []
            child_list = Indexes.get_child_list(q)
            if child_list:
                for item in child_list:
                    wc = {"wildcard": {"path.tree": item.cid}}
                    wild_card.append(wc)
                query_q['query']['bool']['must'] = [{
                    "bool": {
                        "should": wild_card
                    }
                }, {
                    "match": {
                        "relation_version_is_last": "true"
                    }
                }]
        return query_q
Beispiel #17
0
    def get(self, **kwargs):
        """Search records.

        :returns: the search result containing hits and aggregations as
        returned by invenio-search.
        """
        page = request.values.get('page', 1, type=int)
        size = request.values.get('size', 20, type=int)
        community_id = request.values.get('community')

        params = {}
        if current_app.config['RECORDS_REST_FACETS'] and \
            current_app.config['SEARCH_UI_SEARCH_INDEX'] and \
                'post_filters' in current_app.config[
            'RECORDS_REST_FACETS'
        ][current_app.config[
            'SEARCH_UI_SEARCH_INDEX'
        ]]:
            post_filters = current_app.config['RECORDS_REST_FACETS'][
                current_app.config['SEARCH_UI_SEARCH_INDEX']]['post_filters']

            for param in post_filters:
                value = request.args.getlist(param)
                if value:
                    params[param] = value

        if page * size >= self.max_result_window:
            raise MaxResultWindowRESTError()
        urlkwargs = dict()
        search_obj = self.search_class()
        search = search_obj.with_preference_param().params(version=True)
        search = search[(page - 1) * size:page * size]
        search, qs_kwargs = self.search_factory(self, search)

        query = request.values.get('q')
        if query:
            urlkwargs['q'] = query

        # Execute search

        for param in params:
            query_key = current_app.config['WEKO_FACETED_SEARCH_MAPPING'][
                param]
            search = search.post_filter({'terms': {query_key: params[param]}})

        search_result = search.execute()

        # Generate links for prev/next
        urlkwargs.update(
            size=size,
            _external=True,
        )
        # endpoint = '.{0}_index'.format(
        #     current_records_rest.default_endpoint_prefixes[self.pid_type])

        links = dict(self=url_for(
            'weko_search_rest.recid_index', page=page, **urlkwargs))
        if page > 1:
            links['prev'] = url_for('weko_search_rest.recid_index',
                                    page=page - 1,
                                    **urlkwargs)
        if size * page < search_result.hits.total and \
                size * page < self.max_result_window:
            links['next'] = url_for('weko_search_rest.recid_index',
                                    page=page + 1,
                                    **urlkwargs)
        # aggs result identify
        rd = search_result.to_dict()
        q = request.values.get('q') or '0'
        lang = current_i18n.language

        try:
            paths = Indexes.get_self_list(q, community_id)
        except BaseException:
            paths = []
        agp = rd["aggregations"]["path"]["buckets"]
        nlst = []
        for p in paths:
            m = 0
            for k in range(len(agp)):
                if p.path == agp[k].get("key"):
                    agp[k]["name"] = p.name if lang == "ja" else p.name_en
                    date_range = agp[k].pop("date_range")
                    no_available = agp[k].pop("no_available")
                    pub = dict()
                    bkt = date_range['available']['buckets']
                    if bkt:
                        for d in bkt:
                            pub["pub_cnt" if d.
                                get("to") else "un_pub_cnt"] = d.get(
                                    "doc_count")
                        pub["un_pub_cnt"] += no_available['doc_count']
                        agp[k]["date_range"] = pub
                        comment = p.comment
                        agp[k]["comment"] = comment,
                        result = agp.pop(k)
                        result["comment"] = comment
                        nlst.append(result)
                        m = 1
                    break
            if m == 0:
                index_id = p.path if '/' not in p.path \
                    else p.path.split('/').pop()
                index_info = Indexes.get_index(index_id=index_id)
                rss_status = index_info.rss_status
                nd = {
                    'doc_count': 0,
                    'key': p.path,
                    'name': p.name if lang == "ja" else p.name_en,
                    'date_range': {
                        'pub_cnt': 0,
                        'un_pub_cnt': 0
                    },
                    'rss_status': rss_status,
                    'comment': p.comment,
                }
                nlst.append(nd)
        agp.clear()
        # process index tree image info
        if len(nlst):
            index_id = nlst[0].get('key')
            index_id = index_id if '/' not in index_id \
                else index_id.split('/').pop()
            index_info = Indexes.get_index(index_id=index_id)
            # update by weko_dev17 at 2019/04/04
            if len(index_info.image_name) > 0:
                nlst[0]['img'] = index_info.image_name
            nlst[0]['display_format'] = index_info.display_format
            nlst[0]['rss_status'] = index_info.rss_status
        # Update rss_status for index child
        for idx in range(0, len(nlst)):
            index_id = nlst[idx].get('key')
            index_id = index_id if '/' not in index_id \
                else index_id.split('/').pop()
            index_info = Indexes.get_index(index_id=index_id)
            nlst[idx]['rss_status'] = index_info.rss_status
        agp.append(nlst)
        for hit in rd['hits']['hits']:
            try:
                # Register comment
                _comment = list()
                _comment.append(hit['_source']['title'][0])
                hit['_source']['_comment'] = _comment
                # Register custom_sort
                cn = hit['_source']['control_number']
                if index_info.item_custom_sort.get(cn):
                    hit['_source']['custom_sort'] = {
                        str(index_info.id):
                        str(index_info.item_custom_sort.get(cn))
                    }
            except Exception:
                pass

        # add info (headings & page info)
        try:
            item_type_list = {}
            for hit in rd['hits']['hits']:
                # get item type schema
                item_type_id = \
                    hit['_source']['_item_metadata']['item_type_id']
                if item_type_id in item_type_list:
                    item_type = copy.deepcopy(item_type_list[item_type_id])
                else:
                    item_type = ItemType.query.filter_by(
                        id=item_type_id).first()
                    item_type_list[item_type_id] = copy.deepcopy(item_type)
                # heading
                heading = get_heading_info(hit, lang, item_type)
                hit['_source']['heading'] = heading
                # page info
                if 'pageStart' not in hit['_source']:
                    hit['_source']['pageStart'] = []
                if 'pageEnd' not in hit['_source']:
                    hit['_source']['pageEnd'] = []
        except Exception as ex:
            current_app.logger.error(ex)
        return self.make_response(
            pid_fetcher=self.pid_fetcher,
            search_result=rd,
            links=links,
            item_links_factory=self.links_factory,
        )
Beispiel #18
0
Datei: api.py Projekt: mhaya/weko
    def create(cls, journals=None):
        """
        Create the journals. Delete all journals before creation.

        :param journals: the journal information (dictinary).
        :returns: The :class:`Journal` instance lists or None.
        """
        def _add_journal(data):
            with db.session.begin_nested():
                journal = Journal(**data)
                db.session.add(journal)
            db.session.commit()

        if not isinstance(journals, dict):
            return

        data = dict()
        is_ok = True
        try:
            cid = journals.get('id')
            if not cid:
                return
            data["id"] = cid

            # check index id.
            index_id = journals.get('index_id')
            if not index_id:
                return

            index_info = Indexes.get_index(index_id=index_id, with_count=True)

            if index_info:
                data["index_id"] = index_id
            else:
                return

            data["publication_title"] = journals.get('publication_title')
            data["print_identifier"] = journals.get('print_identifier')
            data["online_identifier"] = journals.get('online_identifier')
            data["date_first_issue_online"] = journals.get(
                'date_first_issue_online')
            data["num_first_vol_online"] = journals.get('num_first_vol_online')
            data["num_first_issue_online"] = journals.get(
                'num_first_issue_online')
            data["date_last_issue_online"] = journals.get(
                'date_last_issue_online')
            data["num_last_vol_online"] = journals.get('num_last_vol_online')
            data["num_last_issue_online"] = journals.get(
                'num_last_issue_online')
            data["embargo_info"] = journals.get('embargo_info')
            data["coverage_depth"] = journals.get('coverage_depth')
            data["coverage_notes"] = journals.get('coverage_notes')
            data["publisher_name"] = journals.get('publisher_name')
            data["publication_type"] = journals.get('publication_type')
            data["parent_publication_title_id"] = journals.get(
                'parent_publication_title_id')
            data["preceding_publication_title_id"] = journals.get(
                'preceding_publication_title_id')
            data["access_type"] = journals.get('access_type')
            data["language"] = journals.get('language')
            data["title_alternative"] = journals.get('title_alternative')
            data["title_transcription"] = journals.get('title_transcription')
            data["ncid"] = journals.get('ncid')
            data["ndl_callno"] = journals.get('ndl_callno')
            data["ndl_bibid"] = journals.get('ndl_bibid')
            data["jstage_code"] = journals.get('jstage_code')
            data["ichushi_code"] = journals.get('ichushi_code')
            data["is_output"] = journals.get('is_output')

            # get current user logged id.
            data["owner_user_id"] = current_user.get_id()

            _add_journal(data)
        except IntegrityError as ie:
            is_ok = False
            current_app.logger.debug(ie)
        except Exception as ex:
            is_ok = False
            current_app.logger.debug(ex)
        finally:
            del data
            if not is_ok:
                db.session.rollback()
        return is_ok
Beispiel #19
0
Datei: api.py Projekt: mhaya/weko
 def navi(self):
     """Return the path name."""
     return Indexes.get_path_name(self.get('path', []))
Beispiel #20
0
Datei: api.py Projekt: mhaya/weko
    def convert_item_metadata(self, index_obj, data=None):
        """Convert Item Metadat.

        1. Convert Item Metadata
        2. Inject index tree id to dict
        3. Set Publish Status
        :param index_obj:
        :return: dc
        """
        # if this item has been deleted
        self.delete_es_index_attempt(self.pid)

        try:
            actions = index_obj.get('actions', 'private')
            if not data:
                datastore = RedisStore(
                    redis.StrictRedis.from_url(
                        current_app.config['CACHE_REDIS_URL']))
                cache_key = current_app.config[
                    'WEKO_DEPOSIT_ITEMS_CACHE_PREFIX'].format(
                        pid_value=self.pid.pid_value)

                data_str = datastore.get(cache_key)
                datastore.delete(cache_key)
                data = json.loads(data_str.decode('utf-8'))
        except BaseException:
            abort(500, 'Failed to register item')
        # Get index path
        index_lst = index_obj.get('index', [])
        # Prepare index id list if the current index_lst is a path list
        if index_lst:
            index_id_lst = []
            for index in index_lst:
                indexes = str(index).split('/')
                index_id_lst.append(indexes[len(indexes) - 1])
            index_lst = index_id_lst

        plst = Indexes.get_path_list(index_lst)

        if not plst or len(index_lst) != len(plst):
            raise PIDResolveRESTError(
                description='Any tree index has been deleted')

        index_lst.clear()
        for lst in plst:
            index_lst.append(lst.path)

        # convert item meta data
        dc, jrc, is_edit = json_loader(data, self.pid)
        self.data = data
        self.jrc = jrc
        self.is_edit = is_edit

        # Save Index Path on ES
        jrc.update(dict(path=index_lst))
        # add at 20181121 start
        sub_sort = {}
        for pth in index_lst:
            # es setting
            sub_sort[pth[-13:]] = ""
        jrc.update(dict(custom_sort=sub_sort))
        dc.update(dict(custom_sort=sub_sort))
        dc.update(dict(path=index_lst))

        pubs = '1' if 'private' in actions else '0'
        ps = dict(publish_status=pubs)
        jrc.update(ps)
        dc.update(ps)
        return dc
Beispiel #21
0
    def _get_index_earch_query():

        query_q = {
            "_source": {
                "excludes": ['content']
            },
            "query": {
                "bool": {
                    "must": [{
                        "match": {
                            "path.tree": "@index"
                        }
                    }, {
                        "match": {
                            "relation_version_is_last": "true"
                        }
                    }]
                }
            },
            "aggs": {
                "path": {
                    "terms": {
                        "field": "path.tree",
                        "include": "@index|@index/[^/]+",
                        "size": "@count"
                    },
                    "aggs": {
                        "date_range": {
                            "filter": {
                                "match": {
                                    "publish_status": "0"
                                }
                            },
                            "aggs": {
                                "available": {
                                    "range": {
                                        "field":
                                        "publish_date",
                                        "ranges": [{
                                            "from": "now+1d/d"
                                        }, {
                                            "to": "now+1d/d"
                                        }]
                                    },
                                }
                            }
                        },
                        "no_available": {
                            "filter": {
                                "bool": {
                                    "must_not": [{
                                        "match": {
                                            "publish_status": "0"
                                        }
                                    }]
                                }
                            }
                        }
                    }
                }
            },
            "post_filter": {}
        }

        # add item type aggs
        query_q['aggs']['path']['aggs']. \
            update(get_item_type_aggs(search._index[0]))

        q = request.values.get('q') if index_id is None else index_id
        if q:
            mut = get_permission_filter(q)
        else:
            mut = get_permission_filter()
        if mut:
            mut = list(map(lambda x: x.to_dict(), mut))
            post_filter = query_q['post_filter']
            if mut[0].get('bool'):
                post_filter['bool'] = mut[0]['bool']
            else:
                post_filter['bool'] = {'must': mut}

        # create search query
        if q:
            try:
                fp = Indexes.get_self_path(q)
                if fp:
                    query_q = json.dumps(query_q).replace("@index", fp.path)
                    query_q = json.loads(query_q)
            except BaseException:
                pass

        query_q = json.dumps(query_q).replace("@count",
                                              str(Indexes.get_index_count()))
        query_q = json.loads(query_q)

        return query_q
Beispiel #22
0
def build_rss_xml(data=None, index_id=0, page=1, count=20, term=0, lang=''):
    """Build RSS data as XML format.

    Arguments:
        data {dictionary} -- Elastic search data
        term {int} -- The term

    Returns:
        xml response -- RSS data as XML

    """
    root_url = str(request.url_root).replace('/api/', '/')
    root = Et.Element('rdf:RDF')
    root.set('xmlns', config.WEKO_XMLNS)
    root.set('xmlns:rdf', config.WEKO_XMLNS_RDF)
    root.set('xmlns:rdfs', config.WEKO_XMLNS_RDFS)
    root.set('xmlns:dc', config.WEKO_XMLNS_DC)
    root.set('xmlns:prism', config.WEKO_XMLNS_PRISM)
    root.set('xmlns:lang', lang)

    # First layer
    requested_url = root_url + 'rss/records?index_id=' + str(index_id) + \
        '&page=' + str(page) + '&term=' + str(term) + \
        '&count=' + str(count) + '&lang=' + str(lang)
    channel = Et.SubElement(root, 'channel')
    channel.set('rdf:about', requested_url)

    # Channel layer
    Et.SubElement(channel, 'title').text = 'WEKO3'
    Et.SubElement(channel, 'link').text = requested_url
    if index_id:
        index_detail = Indexes.get_index(index_id)
        Et.SubElement(channel, 'description').text = index_detail.comment \
            or index_detail.index_name \
            or index_detail.index_name_english
    else:
        Et.SubElement(channel, 'description').text = \
            theme_config.THEME_SITENAME
    current_time = datetime.now()
    Et.SubElement(channel,
                  'dc:date').text = current_time.isoformat() + '+00:00'
    items = Et.SubElement(channel, 'items')
    seq = Et.SubElement(items, 'rdf:Seq')
    if not data or not isinstance(data, list):
        xml_str = tostring(root, encoding='utf-8')
        xml_str = str.encode(config.WEKO_XML_FORMAT) + xml_str
        return Response(xml_str, mimetype='text/xml')
    items = [idx for idx in range((page - 1) * count, page * count)]
    item_idx = 0

    # add item layer
    for data_item in data:
        if item_idx not in items:
            item_idx = item_idx + 1
            continue
        item = Et.Element('item')
        item.set('rdf:about', find_rss_value(data_item, 'link'))
        Et.SubElement(item, 'title').text = find_rss_value(data_item, 'title')
        Et.SubElement(item, 'link').text = find_rss_value(data_item, 'link')
        see_also = Et.SubElement(item, 'rdfs:seeAlso')
        see_also.set('rdf:resource', find_rss_value(data_item, 'seeAlso'))

        if isinstance(find_rss_value(data_item, 'creator'), list):
            for creator in find_rss_value(data_item, 'creator'):
                Et.SubElement(item, 'dc:creator').text = creator
        else:
            Et.SubElement(item, 'dc:creator').text = find_rss_value(
                data_item, 'creator')
        Et.SubElement(item, 'dc:publisher').text = find_rss_value(
            data_item, 'publisher')
        Et.SubElement(item, 'prism:publicationName').text = find_rss_value(
            data_item, 'sourceTitle')
        Et.SubElement(item,
                      'prism:issn').text = find_rss_value(data_item, 'issn')
        Et.SubElement(item, 'prism:volume').text = find_rss_value(
            data_item, 'volume')
        Et.SubElement(item, 'prism:number').text = find_rss_value(
            data_item, 'issue')
        Et.SubElement(item, 'prism:startingPage').text = find_rss_value(
            data_item, 'pageStart')
        Et.SubElement(item, 'prism:endingPage').text = find_rss_value(
            data_item, 'pageEnd')
        Et.SubElement(item, 'prism:publicationDate').text = find_rss_value(
            data_item, 'date')
        Et.SubElement(item, 'description').text = find_rss_value(
            data_item, 'description')
        Et.SubElement(item,
                      'dc:date').text = find_rss_value(data_item, '_updated')
        li = Et.SubElement(seq, 'rdf:li')
        li.set('rdf:resource', find_rss_value(data_item, 'link'))
        root.append(item)
        item_idx = item_idx + 1
    xml_str = tostring(root, encoding='utf-8')
    xml_str = str.encode(config.WEKO_XML_FORMAT) + xml_str
    response = current_app.response_class()
    response.data = xml_str
    response.headers['Content-Type'] = 'application/xml'
    return response
Beispiel #23
0
def get_records(**kwargs):
    """Get records paginated."""
    def index_ids_has_future_date():
        """Get indexes."""
        query = Index.query.filter(
            Index.public_state.is_(True),
            Index.public_date > datetime.now(),
            Index.harvest_public_state.is_(True)
        )
        indexes = query.all() or []
        index_ids = [index.id for index in indexes]
        return index_ids

    def get_records_has_doi():
        """Get object_uuid of PersistentIdentifier."""
        # Get object_uuid of PersistentIdentifier
        query = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == 'doi'
        )
        pids = query.all() or []
        object_uuids = [pid.object_uuid for pid in pids]
        # Get RecordMetadata
        query = RecordMetadata.query.filter(
            RecordMetadata.id.in_(object_uuids)
        )
        records = query.all() or []
        return records

    def add_condition_doi_and_future_date(query):
        """Add condition which do not get DOI."""
        index_ids = index_ids_has_future_date()
        records = get_records_has_doi()
        for record in records:
            paths = record.json.get('path', [])
            for path in paths:
                if path in index_ids:
                    query = query.post_filter(
                        'bool',
                        **{'must_not': [
                            {'term': {'_id': str(record.id)}}]})
                    continue

    from weko_index_tree.api import Indexes
    page_ = kwargs.get('resumptionToken', {}).get('page', 1)
    size_ = current_app.config['OAISERVER_PAGE_SIZE']
    scroll = current_app.config['OAISERVER_RESUMPTION_TOKEN_EXPIRE_TIME']
    scroll_id = kwargs.get('resumptionToken', {}).get('scroll_id')

    if not scroll_id:
        search = OAIServerSearch(
            index=current_app.config['INDEXER_DEFAULT_INDEX'],
        ).params(
            scroll='{0}s'.format(scroll),
        ).extra(
            version='true',
        ).sort(
            {'control_number': {'order': 'asc'}}
        )[(page_ - 1) * size_:page_ * size_]

        if 'set' in kwargs:
            search = search.query('match', **{'_oai.sets': kwargs['set']})

        time_range = {}
        if 'from_' in kwargs:
            time_range['gte'] = kwargs['from_']
        if 'until' in kwargs:
            time_range['lte'] = kwargs['until']
        if time_range:
            search = search.filter('range', **{'_updated': time_range})

        search = search.query('match', **{'relation_version_is_last': 'true'})
        index_paths = Indexes.get_harverted_index_list()
        query_filter = [
            # script get deleted items.
            {"bool": {"must_not": {"exists": {"field": "path"}}}}
        ]
        for index_path in index_paths:
            query_filter.append({
                "wildcard": {
                    "path": index_path
                }
            })
        search = search.query(
            'bool', **{'must': [{'bool': {'should': query_filter}}]})
        add_condition_doi_and_future_date(search)
        response = search.execute().to_dict()
    else:
        response = current_search_client.scroll(
            scroll_id=scroll_id,
            scroll='{0}s'.format(scroll),
        )

    class Pagination(object):
        """Dummy pagination class."""

        page = page_
        per_page = size_

        def __init__(self, response):
            """Initilize pagination."""
            self.response = response
            self.total = response['hits']['total']
            self._scroll_id = response.get('_scroll_id')

            # clean descriptor on last page
            if not self.has_next:
                current_search_client.clear_scroll(
                    scroll_id=self._scroll_id
                )
                self._scroll_id = None

        @cached_property
        def has_next(self):
            """Return True if there is next page."""
            return self.page * self.per_page <= self.total

        @cached_property
        def next_num(self):
            """Return next page number."""
            return self.page + 1 if self.has_next else None

        @property
        def items(self):
            """Return iterator."""
            from datetime import datetime
            for result in self.response['hits']['hits']:
                if '_oai' in result['_source']:
                    yield {
                        'id': result['_id'],
                        'json': result,
                        'updated': datetime.strptime(
                            result['_source']['_updated'][:19],
                            '%Y-%m-%dT%H:%M:%S'
                        ),
                    }

    return Pagination(response)
Beispiel #24
0
def search():
    """Index Search page ui."""
    search_type = request.args.get('search_type',
                                   WEKO_SEARCH_TYPE_DICT['FULL_TEXT'])
    get_args = request.args
    community_id = ""
    ctx = {'community': None}
    cur_index_id = search_type if search_type not in \
        (WEKO_SEARCH_TYPE_DICT['FULL_TEXT'], WEKO_SEARCH_TYPE_DICT[
            'KEYWORD'], ) else None
    if 'community' in get_args:
        from weko_workflow.api import GetCommunity
        comm = GetCommunity.get_community_by_id(request.args.get('community'))
        ctx = {'community': comm}
        community_id = comm.id

    # Get the design for widget rendering
    page, render_widgets = get_design_layout(
        community_id or current_app.config['WEKO_THEME_DEFAULT_COMMUNITY'])

    # Get index style
    style = IndexStyle.get(
        current_app.config['WEKO_INDEX_TREE_STYLE_OPTIONS']['id'])
    width = style.width if style else '3'

    # add at 1206 for search management
    sort_options, display_number = SearchSetting.get_results_setting()
    ts = time.time()
    disply_setting = dict(size=display_number, timestamp=ts)

    detail_condition = get_search_detail_keyword('')

    export_settings = AdminSettings.get('item_export_settings') or \
        AdminSettings.Dict2Obj(
            current_app.config['WEKO_ADMIN_DEFAULT_ITEM_EXPORT_SETTINGS'])

    height = style.height if style else None
    if 'item_link' in get_args:
        from weko_workflow.api import WorkActivity

        activity_id = request.args.get('item_link')
        workflow_activity = WorkActivity()
        activity_detail, item, steps, action_id, cur_step, temporary_comment,\
            approval_record, step_item_login_url, histories, res_check, pid, \
            community_id, ctx = workflow_activity.get_activity_index_search(
                activity_id=activity_id)

        # Get ex-Item Links
        recid = item['pid'].get('value') if item.get('pid') else None
        if recid:
            pid_without_ver = recid.split('.')[0]
            item_link = ItemLink.get_item_link_info(pid_without_ver)
            ctx['item_link'] = item_link

        return render_template(
            'weko_workflow/activity_detail.html',
            page=page,
            render_widgets=render_widgets,
            activity=activity_detail,
            item=item,
            steps=steps,
            action_id=action_id,
            cur_step=cur_step,
            temporary_comment=temporary_comment,
            record=approval_record,
            step_item_login_url=step_item_login_url,
            histories=histories,
            res_check=res_check,
            pid=pid,
            index_id=cur_index_id,
            community_id=community_id,
            width=width,
            height=height,
            allow_item_exporting=export_settings.allow_item_exporting,
            is_permission=check_permission(),
            is_login=bool(current_user.get_id()),
            **ctx)
    else:
        journal_info = None
        index_display_format = '1'
        check_site_license_permission()
        send_info = dict()
        send_info['site_license_flag'] = True \
            if hasattr(current_user, 'site_license_flag') else False
        send_info['site_license_name'] = current_user.site_license_name \
            if hasattr(current_user, 'site_license_name') else ''
        if search_type in WEKO_SEARCH_TYPE_DICT.values():
            searched.send(current_app._get_current_object(),
                          search_args=get_args,
                          info=send_info)
            if search_type == WEKO_SEARCH_TYPE_DICT['INDEX']:
                cur_index_id = request.args.get('q', '0')
                journal_info = get_journal_info(cur_index_id)
                index_info = Indexes.get_index(cur_index_id)
                if index_info:
                    index_display_format = index_info.display_format
                    if index_display_format == '2':
                        disply_setting = dict(size=100, timestamp=ts)

        if hasattr(current_i18n, 'language'):
            index_link_list = get_index_link_list(current_i18n.language)
        else:
            index_link_list = get_index_link_list()
        return render_template(
            current_app.config['SEARCH_UI_SEARCH_TEMPLATE'],
            page=page,
            render_widgets=render_widgets,
            index_id=cur_index_id,
            community_id=community_id,
            sort_option=sort_options,
            disply_setting=disply_setting,
            detail_condition=detail_condition,
            width=width,
            height=height,
            index_link_enabled=style.index_link_enabled,
            index_link_list=index_link_list,
            journal_info=journal_info,
            index_display_format=index_display_format,
            allow_item_exporting=export_settings.allow_item_exporting,
            is_permission=check_permission(),
            is_login=bool(current_user.get_id()),
            **ctx)
Beispiel #25
0
def get_child_list(index_id=0):
    """Get child id list to index list display."""
    return jsonify(Indexes.get_child_id_list(index_id))
Beispiel #26
0
    def index(self):
        """Index Search page ui."""
        search_type = request.args.get('search_type', '0')
        getArgs = request.args
        community_id = ""
        ctx = {'community': None}
        cur_index_id = search_type if search_type not in (
            '0',
            '1',
        ) else None
        if 'community' in getArgs:
            from weko_workflow.api import GetCommunity
            comm = GetCommunity.get_community_by_id(
                request.args.get('community'))
            ctx = {'community': comm}
            community_id = comm.id

        # Get index style
        style = IndexStyle.get(
            current_app.config['WEKO_INDEX_TREE_STYLE_OPTIONS']['id'])
        width = style.width if style else '3'

        detail_condition = get_search_detail_keyword('')

        height = style.height if style else None

        if 'item_management' in getArgs:
            management_type = request.args.get('item_management', 'sort')

            has_items = False
            has_child_trees = False
            if management_type == 'delete':
                # Does this tree has items or children?
                q = request.args.get('q')
                if q is not None and q.isdigit():
                    current_tree = Indexes.get_index(q)
                    recursive_tree = Indexes.get_recursive_tree(q)

                    if current_tree is not None:
                        tree_items = get_tree_items(current_tree.id)
                        has_items = len(tree_items) > 0
                        if recursive_tree is not None:
                            has_child_trees = len(recursive_tree) > 1

            return self.render(
                current_app.
                config['WEKO_THEME_ADMIN_ITEM_MANAGEMENT_TEMPLATE'],
                index_id=cur_index_id,
                community_id=community_id,
                width=width,
                height=height,
                management_type=management_type,
                fields=current_app.config['WEKO_RECORDS_UI_BULK_UPDATE_FIELDS']
                ['fields'],
                licences=current_app.
                config['WEKO_RECORDS_UI_BULK_UPDATE_FIELDS']['licences'],
                has_items=has_items,
                has_child_trees=has_child_trees,
                detail_condition=detail_condition,
                **ctx)
        else:
            return abort(500)
Beispiel #27
0
 def get_index(self):
     """Get Index obj relate to repository_id."""
     if self.index_id:
         return Indexes.get_index(self.index_id)
     else:
         return None
Beispiel #28
0
    def get(self, **kwargs):
        """Search records.

        :returns: the search result containing hits and aggregations as
        returned by invenio-search.
        """

        page = request.values.get('page', 1, type=int)
        size = request.values.get('size', 20, type=int)
        if page * size >= self.max_result_window:
            raise MaxResultWindowRESTError()

        urlkwargs = dict()
        search_obj = self.search_class()
        search = search_obj.with_preference_param().params(version=True)
        search = search[(page - 1) * size:page * size]

        search, qs_kwargs = self.search_factory(self, search)
        urlkwargs.update(qs_kwargs)

        # Execute search
        search_result = search.execute()

        # Generate links for prev/next
        urlkwargs.update(
            size=size,
            _external=True,
        )
        # endpoint = '.{0}_index'.format(
        #     current_records_rest.default_endpoint_prefixes[self.pid_type])

        links = dict(self=url_for(
            'weko_search_rest.recid_index', page=page, **urlkwargs))
        if page > 1:
            links['prev'] = url_for('weko_search_rest.recid_index',
                                    page=page - 1,
                                    **urlkwargs)
        if size * page < search_result.hits.total and \
                size * page < self.max_result_window:
            links['next'] = url_for('weko_search_rest.recid_index',
                                    page=page + 1,
                                    **urlkwargs)

        # aggs result identify
        rd = search_result.to_dict()
        q = request.values.get('q')
        lang = current_i18n.language

        if q:
            try:
                paths = Indexes.get_self_list(q)
            except BaseException:
                paths = []
            agp = rd["aggregations"]["path"]["buckets"]
            nlst = []

            for p in paths:
                m = 0
                for k in range(len(agp)):
                    if p.path == agp[k].get("key"):
                        agp[k]["name"] = p.name if lang == "ja" else p.name_en
                        date_range = agp[k].pop("date_range")
                        no_available = agp[k].pop("no_available")
                        pub = dict()
                        bkt = date_range['available']['buckets']
                        if bkt:
                            for d in bkt:
                                pub["pub_cnt" if d.
                                    get("to") else "un_pub_cnt"] = d.get(
                                        "doc_count")
                            pub["un_pub_cnt"] += no_available['doc_count']
                            agp[k]["date_range"] = pub
                            nlst.append(agp.pop(k))
                            m = 1
                        break
                if m == 0:
                    nd = {
                        'doc_count': 0,
                        'key': p.path,
                        'name': p.name if lang == "ja" else p.name_en,
                        'date_range': {
                            'pub_cnt': 0,
                            'un_pub_cnt': 0
                        }
                    }
                    nlst.append(nd)
            agp.clear()
            # process index tree image info
            if len(nlst):
                index_id = nlst[0].get('key')
                index_id = index_id if '/' not in index_id \
                    else index_id.split('/').pop()
                index_info = Indexes.get_index(index_id=index_id)
                if index_info.display_format == '2' \
                    and len(index_info.image_name) > 0:
                    nlst[0]['img'] = index_info.image_name
            agp.append(nlst)
        current_app.logger.debug(rd)
        return self.make_response(
            pid_fetcher=self.pid_fetcher,
            search_result=rd,
            links=links,
            item_links_factory=self.links_factory,
        )
Beispiel #29
0
 def get_index(self):
     """Get Index obj by repository_id."""
     if self.repository_id:
         return Indexes.get_index(self.repository_id)
     else:
         return None