def submit_edit_set(spec):
    """Insert a new set."""
    form = get_NewSetForm(request.form)
    if request.method == 'POST' and form.validate():
        old_set = Set.query.filter(spec=spec)
        query = Query(old_set.search_pattern)
        old_recid = current_search_client.search(
            index="records",
            doc_type="record",
            body=query.body,
            fields="_id, oaiid"
        )
        query = Query(form.search_pattern)
        new_recid = current_search_client.search(
            index="records",
            doc_type="record",
            body=query.body,
            fields="_id, oaiid"
        )
        recids_to_delete = set(old_recid)-set(new_recid)
        # TODO: marks records as deleted from set
        remove_recids_from_set(recids_to_delete)
        add_records_to_set(new_recid)
        flash('Set was changed')
        return redirect(url_for('.manage_sets'))
    return render_template('edit_set.html', edit_set_form=form, spec=spec)
Beispiel #2
0
def push_data_keywords(pub_ids=None, index=None):
    """ Go through all the publications and their datatables and move data
     keywords from tables to their parent publications. """

    if not pub_ids:
        body = {"query": {"match_all": {}}}
        results = es.search(index=index, doc_type=CFG_PUB_TYPE, body=body, _source=False)
        pub_ids = [i["_id"] for i in results["hits"]["hits"]]

    for pub_id in pub_ids:
        query_builder = QueryBuilder()
        query_builder.add_child_parent_relation(
            "publication", relation="parent", must=True, related_query={"match": {"recid": pub_id}}
        )
        tables = es.search(index=index, doc_type=CFG_DATA_TYPE, body=query_builder.query, _source_include="keywords")
        keywords = [d["_source"].get("keywords", None) for d in tables["hits"]["hits"]]

        # Flatten the list
        keywords = [i for inner in keywords for i in inner]

        # Aggregate
        agg_keywords = defaultdict(list)
        for kw in keywords:
            agg_keywords[kw["name"]].append(kw["value"])

        # Remove duplicates
        for k, v in agg_keywords.items():
            agg_keywords[k] = list(set(v))

        body = {"doc": {"data_keywords": dict(agg_keywords)}}

        try:
            es.update(index=index, doc_type=CFG_PUB_TYPE, id=pub_id, body=body)
        except Exception as e:
            log.error(e.message)
Beispiel #3
0
def match(record, config=None):
    """Given a record, yield the records in INSPIRE most similar to it.

    This method can be used to detect if a record that we are ingesting as a
    submission or as an harvest is already present in the system, or to find
    out which record a reference should be pointing to.
    """
    if config is None:
        current_app.logger.debug(
            'No configuration provided. Falling back to the default configuration.'
        )
        config = current_app.config['MATCHER_DEFAULT_CONFIGURATION']

    try:
        algorithm, doc_type, index = config['algorithm'], config[
            'doc_type'], config['index']
    except KeyError as e:
        raise KeyError('Malformed configuration: %s.' % repr(e))

    source = config.get('source', [])

    for i, step in enumerate(algorithm):
        try:
            queries = step['queries']
        except KeyError:
            raise KeyError('Malformed algorithm: step %d has no queries.' % i)

        validator = _get_validator(step.get('validator'))

        for j, query in enumerate(queries):
            try:
                body = compile(query, record)
            except Exception as e:
                raise ValueError(
                    'Malformed query. Query %d of step %d does not compile: %s.'
                    % (j, i, repr(e)))

            if not body:
                continue

            if source:
                result = es.search(index=index,
                                   doc_type=doc_type,
                                   body=body,
                                   _source=source)
            else:
                result = es.search(index=index, doc_type=doc_type, body=body)

            for hit in result['hits']['hits']:
                if validator(record, hit):
                    yield hit
def test_index_institutions_record(base_app, es_clear, db, datadir,
                                   create_record):
    data = json.loads((datadir / "902725.json").read_text())
    record = create_record("ins", data=data)

    expected_count = 1
    expected_metadata = deepcopy(record)
    expected_metadata["affiliation_suggest"] = {
        "input": [
            "CERN, Geneva",
            "CERN",
            "European Organization for Nuclear Research",
            "CERN",
            "Centre Européen de Recherches Nucléaires",
            "01631",
            "1211",
        ]
    }
    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)

    response = es.search("records-institutions")

    assert response["hits"]["total"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
Beispiel #5
0
def test_index_record_manually(app, celery_app_with_context,
                               celery_session_worker, retry_until_matched):
    data = faker.record("lit")
    rec = LiteratureRecord.create(data)
    models_committed.disconnect(index_after_commit)
    db.session.commit()
    models_committed.connect(index_after_commit)
    es.indices.refresh("records-hep")
    result = es.search("records-hep")
    assert result["hits"]["total"] == 0

    rec.index()
    steps = [
        {
            "step": es.indices.refresh,
            "args": ["records-hep"]
        },
        {
            "step": es.search,
            "args": ["records-hep"],
            "expected_result": {
                "expected_key": "hits.total",
                "expected_result": 1
            },
        },
    ]
    retry_until_matched(steps)
Beispiel #6
0
    def get_record_acls(clz, record: Record) -> Iterable['ACL']:
        """
        Returns a list of ACL objects applicable for the given record.

        :param record: Invenio record
        :return:
        """
        # run percolate query on the index record's index

        query = clz._get_percolate_query(record)
        if logger.isEnabledFor(logging.DEBUG) <= logging.DEBUG:
            logger.debug('get_material_acls: query %s', json.dumps(query, indent=4, ensure_ascii=False))
        index, _doc_type = current_record_to_index(record)
        try:
            for r in current_search_client.search(
                index=clz.get_acl_index_name(index),
                **add_doc_type(current_app.config['INVENIO_EXPLICIT_ACLS_DOCTYPE_NAME']),
                body=query
            )['hits']['hits']:
                yield clz.query.get(r['_id'])
        except elasticsearch.TransportError as e:
            logger.error('Error running ACL query on index %s, doctype %s, query %s',
                         clz.get_acl_index_name(index), current_app.config['INVENIO_EXPLICIT_ACLS_DOCTYPE_NAME'],
                         query)
            if e.status_code == 404:
                raise RuntimeError('Explicit ACLs were not prepared for the given schema. '
                                   'Please run invenio explicit-acls prepare %s' % record.get('$schema', ''))
            else:  # pragma: no cover
                raise
def test_index_literature_record(es_clear, db, datadir, create_record):

    author_data = json.loads((datadir / "1032336.json").read_text())
    author = create_record("aut", data=author_data)

    data = json.loads((datadir / "1630825.json").read_text())
    record = create_record("lit", data=data)

    expected_count = 1
    expected_metadata = json.loads((datadir / "es_1630825.json").read_text())
    expected_metadata_ui_display = json.loads(
        expected_metadata.pop("_ui_display"))
    expected_facet_author_name = expected_metadata.pop("facet_author_name")
    expected_metadata.pop("authors")

    response = es.search("records-hep")

    result = response["hits"]["hits"][0]["_source"]
    result_ui_display = json.loads(result.pop("_ui_display"))
    result_authors = result.pop("authors")
    result_facet_author_name = result.pop("facet_author_name")
    del result["_created"]
    del result["_updated"]
    assert response["hits"]["total"] == expected_count
    assert result == expected_metadata
    assert result_ui_display == expected_metadata_ui_display
    assert len(record.get("authors")) == len(result_facet_author_name)
    assert sorted(result_facet_author_name) == sorted(
        expected_facet_author_name)
def load_records(app, filename, schema, tries=5):
    """Try to index records."""
    indexer = RecordIndexer()
    records = []
    with app.app_context():
        with mock.patch('invenio_records.api.Record.validate',
                        return_value=None):
            data_filename = pkg_resources.resource_filename(
                'invenio_records', filename)
            records_data = load(data_filename)
            with db.session.begin_nested():
                for item in records_data:
                    record_id = uuid.uuid4()
                    item_dict = dict(marc21.do(item))
                    item_dict['$schema'] = schema
                    recid_minter(record_id, item_dict)
                    oaiid_minter(record_id, item_dict)
                    record = Record.create(item_dict, id_=record_id)
                    indexer.index(record)
                    records.append(record.id)
            db.session.commit()

        # Wait for indexer to finish
        for i in range(tries):
            response = current_search_client.search()
            if response['hits']['total'] >= len(records):
                break
            current_search.flush_and_refresh('_all')

    return records
Beispiel #9
0
def get_n_latest_records(n_latest, field="last_updated", index=None):
    """ Gets latest N records from the index """

    query = {"size": n_latest, "query": QueryBuilder.generate_query_string(), "sort": [{field: {"order": "desc"}}]}

    query_result = es.search(index=index, doc_type=CFG_PUB_TYPE, body=query)
    return query_result["hits"]["hits"]
Beispiel #10
0
def api():
    """Search API for search UI demo.

    .. note::

        WARNING! This search API is just for demo proposes only.

    """
    page = request.values.get('page', 1, type=int)
    size = request.values.get('size', 1, type=int)
    query = Query(request.values.get('q', ''))[(page-1)*size:page*size]
    # dummy facets
    query.body["aggs"] = {
        "by_body": {
            "terms": {
                "field": "summary.summary"
            }
        },
        "by_title": {
            "terms": {
                "field": "title_statement.title"
            }
        }
    }
    response = current_search_client.search(
        index=request.values.get('index', 'records'),
        doc_type=request.values.get('type'),
        body=query.body,
    )
    return jsonify(**response)
def load_records(app, filename, schema, tries=5):
    """Try to index records."""
    indexer = RecordIndexer()
    records = []
    with app.app_context():
        with mock.patch("invenio_records.api.Record.validate",
                        return_value=None):
            data_filename = pkg_resources.resource_filename(
                "invenio_records", filename)
            records_data = load(data_filename)
            with db.session.begin_nested():
                for item in records_data:
                    record_id = uuid.uuid4()
                    item_dict = dict(marc21.do(item))
                    item_dict["$schema"] = schema
                    recid_minter(record_id, item_dict)
                    oaiid_minter(record_id, item_dict)
                    record = current_oaiserver.record_cls.create(item_dict,
                                                                 id_=record_id)
                    indexer.index(record)
                    records.append(record.id)
            db.session.commit()

        # Wait for indexer to finish
        for i in range(tries):
            response = current_search_client.search()
            if response["hits"]["total"] >= len(records):
                break
            current_search.flush_and_refresh("_all")

    return records
def submit_set():
    """Insert a new set."""
    form = get_NewSetForm(request.form)
    if request.method == 'POST' and form.validate():
        new_set = Set(spec=form.spec.data,
                      name=form.name.data,
                      description=form.description.data,
                      search_pattern=form.search_pattern.data,
                      #collection=form.collection.data,
                      parent=form.parent.data)
        db.session.add(new_set)

        #this shoul be moved to UPDATER (celery task) and it sould always take care of adding records to sets.
        ##########
        query = Query(form.query.data)
        response = current_search_client.search(
            index="records",# make configurable PER SET
            doc_type="record",# make configurable PER SET
            body=query.body,
            fields="_id, oaiid" #path to oaiid as a configurable
        )
        ids = [(a['_id'], a['oaiid']) for a in response['hits']['hits']]
        add_records_to_set(ids)
        #########

        db.session.commit()
        flash('New set was added.')
        return redirect(url_for('.manage_sets'))
    return render_template('make_set.html', new_set_form=form)
Beispiel #13
0
def delete_halted_workflows_for_doi(doi):
    """
    Deletes all workflow that contain the given doi and are in HALTED state.

    The workflow index will only be updated, when a WorkflowObjectModel instance is saved. When a workflow is halted,
    the connected object's status won't be changed, hence the index won't be updated. Because of all this, we cannot
    filter for HALTED state in ElasticSearch.
    """

    current_search_client.indices.refresh("scoap3-workflows-harvesting")
    search_result = current_search_client.search(
        index='scoap3-workflows-harvesting',
        q='metadata.dois.value:"%s"' % doi)

    workflow_ids = {
        x['_source']['_workflow']['id_workflow']
        for x in search_result['hits']['hits']
    }
    for wid in workflow_ids:
        if wid:
            w = Workflow.query.get(wid)
            if w and w.status == WorkflowStatus.HALTED:
                db.session.delete(w)

    db.session.commit()
Beispiel #14
0
def search_export(es_dict):
    """
    Exports basic record data for all filtered records.

    :param es_dict: defines the ElasticSearch data in order to filter the records.
    """

    fields = current_app.config.get('SEARCH_EXPORT_FIELDS')
    source_fields = [field for _, field, _ in fields]

    size = current_app.config.get('TOOL_ELASTICSEARCH_PAGE_SIZE', 100)
    search_index = current_app.config.get('SEARCH_UI_SEARCH_INDEX')

    result_data = []
    index = 0
    total_hits = None
    while total_hits is None or index < total_hits:
        # query ElasticSearch for result
        search_results = current_search_client.search(body=es_dict,
                                                      index=search_index,
                                                      _source=source_fields,
                                                      size=size,
                                                      from_=index)
        total_hits = search_results['hits']['total']['value']
        index += len(search_results['hits']['hits'])

        # extract and add data to result list
        for hit in search_results['hits']['hits']:
            record = hit['_source']
            result_data.append(
                [get_value(record, key, '') for _, _, key in fields])

    return {'header': [name for name, _, _ in fields], 'data': result_data}
Beispiel #15
0
def load_records(app, filename, schema, tries=5):
    """Try to index records."""
    indexer = RecordIndexer()
    records = []
    with app.app_context():
        with mock.patch('invenio_records.api.Record.validate',
                        return_value=None):
            data_filename = pkg_resources.resource_filename(
                'invenio_records', filename)
            records_data = load(data_filename)
            with db.session.begin_nested():
                for item in records_data:
                    record_id = uuid.uuid4()
                    item_dict = dict(marc21.do(item))
                    item_dict['$schema'] = schema
                    recid_minter(record_id, item_dict)
                    oaiid_minter(record_id, item_dict)
                    record = Record.create(item_dict, id_=record_id)
                    indexer.index(record)
                    records.append(record.id)
            db.session.commit()

        # Wait for indexer to finish
        for i in range(tries):
            response = current_search_client.search()
            if response['hits']['total'] >= len(records):
                break
            sleep(5)

    return records
def already_pending_in_holdingpen_validator(property_name, value):
    """Check if there's a submission in the holdingpen with the same arXiv ID.
    """
    if property_name == 'arXiv ID':
        query_should = {
            'metadata.arxiv_eprints.value.raw': value,
        }
    elif property_name == 'DOI':
        query_should = {
            'metadata.dois.value.raw': value,
        }

    query = {
        "query": {
            "bool": {
                "filter": [
                    {
                        "term": {
                            "metadata.acquisition_source.source": "submitter"
                        },
                    },
                    {
                        "bool": {
                            "must_not": {
                                "term": {
                                    "_workflow.status": "COMPLETED"
                                }
                            }
                        }
                    }
                ],
                "must": [
                    {
                        "term": query_should,
                    }
                ]
            }
        },
        "_source": {
            "includes": [
                "_id"
            ]
        }
    }

    hits = es.search(
        index='holdingpen-hep',
        doc_type='hep',
        body=query,
    )['hits']['hits']

    matches = dedupe_list(hits)
    holdingpen_ids = [int(el['_id']) for el in matches]

    if holdingpen_ids:
        raise ValidationError(
            'There exists already a pending suggestion with the same %s '
            '"%s", it will be attended to shortly.'
            % (property_name, value)
        )
Beispiel #17
0
def already_pending_in_holdingpen_validator(property_name, value):
    """Check if there's a submission in the holdingpen with the same arXiv ID.
    """
    if property_name == 'arXiv ID':
        query_should = {
            'metadata.arxiv_eprints.value.raw': value,
        }
    elif property_name == 'DOI':
        query_should = {
            'metadata.dois.value.raw': value,
        }

    query = {
        "query": {
            "bool": {
                "filter": [
                    {
                        "term": {
                            "metadata.acquisition_source.source": "submitter"
                        },
                    },
                    {
                        "bool": {
                            "must_not": {
                                "term": {
                                    "_workflow.status": "COMPLETED"
                                }
                            }
                        }
                    }
                ],
                "must": [
                    {
                        "term": query_should,
                    }
                ]
            }
        },
        "_source": {
            "includes": [
                "_id"
            ]
        }
    }

    hits = es.search(
        index='holdingpen-hep',
        doc_type='hep',
        body=query,
    )['hits']['hits']

    matches = dedupe_list(hits)
    holdingpen_ids = [int(el['_id']) for el in matches]

    if holdingpen_ids:
        raise ValidationError(
            'There exists already a pending suggestion with the same %s '
            '"%s", it will be attended to shortly.'
            % (property_name, value)
        )
Beispiel #18
0
 def validate_record_selector(self, form, field):
     """Checks that the record selector is valid and we can use it to perform query in elasticsearch index."""
     schemas = form.schemas.data
     record_selector = field.data
     if not record_selector:
         raise StopValidation(
             'Record selector must not be empty. If you want to match all resources, use {"match_all": {}}')
     try:
         for index in schemas:
             current_search_client.search(
                 index=index, size=0, body={
                     'query': record_selector
                 }
             )
     except Exception as e:
         raise StopValidation(str(e))
Beispiel #19
0
def submit_set():
    """Insert a new set."""
    form = get_NewSetForm(request.form)
    if request.method == 'POST' and form.validate():
        new_set = OAISet(spec=form.spec.data,
                         name=form.name.data,
                         description=form.description.data,
                         search_pattern=form.search_pattern.data,
                         parent=form.parent.data)
        db.session.add(new_set)

        # this shoul be moved to UPDATER (celery task) and it sould always
        # take care of adding records to sets.
        ##########
        query = Query(form.query.data)
        response = current_search_client.search(
            index='records',  # make configurable PER SET
            doc_type='record',  # make configurable PER SET
            body=query.body,
            fields='_id, oaiid'  # path to oaiid as a configurable
        )
        ids = [(a['_id'], a['oaiid']) for a in response['hits']['hits']]
        add_records_to_set(ids)
        #########

        db.session.commit()
        flash(_('New set %(spec)s was added.', spec=new_set.spec))
        return redirect(url_for('.manage_sets'))
    return render_template('make_set.html', new_set_form=form)
Beispiel #20
0
def _percolate_query(index, doc_type, percolator_doc_type, document):
    """Get results for a percolate query."""
    if ES_VERSION[0] in (2, 5):
        results = current_search_client.percolate(index=index,
                                                  doc_type=doc_type,
                                                  allow_no_indices=True,
                                                  ignore_unavailable=True,
                                                  body={'doc': document})
        return results['matches']
    elif ES_VERSION[0] == 6:
        results = current_search_client.search(index=index,
                                               doc_type=percolator_doc_type,
                                               allow_no_indices=True,
                                               ignore_unavailable=True,
                                               body={
                                                   'query': {
                                                       'percolate': {
                                                           'field': 'query',
                                                           'document_type':
                                                           percolator_doc_type,
                                                           'document':
                                                           document,
                                                       }
                                                   }
                                               })
        return results['hits']['hits']
Beispiel #21
0
def _percolate_query(index, doc_type, percolator_doc_type, document):
    """Get results for a percolate query."""
    index = _build_percolator_index_name(index)
    if ES_VERSION[0] in (2, 5):
        results = current_search_client.percolate(index=index,
                                                  doc_type=doc_type,
                                                  allow_no_indices=True,
                                                  ignore_unavailable=True,
                                                  body={'doc': document})
        return results['matches']
    elif ES_VERSION[0] in (6, 7):
        es_client_params = dict(index=index,
                                doc_type=percolator_doc_type,
                                allow_no_indices=True,
                                ignore_unavailable=True,
                                body={
                                    'query': {
                                        'percolate': {
                                            'field': 'query',
                                            'document_type':
                                            percolator_doc_type,
                                            'document': document,
                                        }
                                    }
                                })
        if ES_VERSION[0] == 7:
            es_client_params.pop('doc_type')
        results = current_search_client.search(**es_client_params)
        return results['hits']['hits']
Beispiel #22
0
def get_records_matching_field(field, id, index=None, doc_type=None, source=None):
    """ Checks if a record with a given ID exists in the index """

    query = {
        "size": 9999,
        'query': {
            "bool": {
                "must": [
                    {
                        "match": {
                            field: id
                        }
                    }
                ]
            }
        }
    }

    if doc_type:
        query["query"]["bool"]["must"].append({
            "match": {
                "doc_type": doc_type
            }
        })

    if source:
        query["_source"] = source

    return es.search(index=index, body=query)
Beispiel #23
0
def push_data_keywords(pub_ids=None, index=None):
    """ Go through all the publications and their datatables and move data
     keywords from tables to their parent publications. """

    if not pub_ids:
        body = {'query': {'match_all': {}}}
        results = es.search(index=index,
                            doc_type=CFG_PUB_TYPE,
                            body=body,
                            _source=False)
        pub_ids = [i['_id'] for i in results['hits']['hits']]

    for pub_id in pub_ids:
        query_builder = QueryBuilder()
        query_builder.add_child_parent_relation(
            'publication',
            relation='parent',
            must=True,
            related_query={'match': {
                'recid': pub_id
            }})
        tables = es.search(index=index,
                           doc_type=CFG_DATA_TYPE,
                           body=query_builder.query,
                           _source_include='keywords')
        keywords = [
            d['_source'].get('keywords', None) for d in tables['hits']['hits']
        ]

        # Flatten the list
        keywords = [i for inner in keywords for i in inner]

        # Aggregate
        agg_keywords = defaultdict(list)
        for kw in keywords:
            agg_keywords[kw['name']].append(kw['value'])

        # Remove duplicates
        for k, v in agg_keywords.items():
            agg_keywords[k] = list(set(v))

        body = {"doc": {'data_keywords': dict(agg_keywords)}}

        try:
            es.update(index=index, doc_type=CFG_PUB_TYPE, id=pub_id, body=body)
        except Exception as e:
            log.error(e.message)
Beispiel #24
0
    def references(self):
        """Reference export for single record in datatables format.

        :returns: list
            List of lists where every item represents a datatables row.
            A row consists of [reference_number, reference, num_citations]
        """

        out = []
        references = self.record.get('references')
        if references:
            refs_to_get_from_es = [
                ref['recid'] for ref in references if ref.get('recid')
            ]
            query = IQ(' OR '.join('recid:' + str(ref)
                                   for ref in refs_to_get_from_es))
            records_from_es = current_search_client.search(
                index='records-hep',
                doc_type='hep',
                body={"query": query.to_dict()},
                size=9999,
                _source=[
                    'control_number',
                    'citation_count',
                    'titles',
                    'earliest_date',
                    'authors',
                    'collaboration',
                    'corporate_author',
                    'publication_info'
                ]
            )['hits']['hits']

            refs_from_es = {
                str(ref['_source']['control_number']): ref['_source'] for ref in records_from_es
            }
            for reference in references:
                row = []
                recid = reference.get('recid')
                ref_record = refs_from_es.get(str(recid)) if recid else None

                if recid and ref_record:
                    ref_record = Record(ref_record)
                    if ref_record:
                        row.append(render_template_to_string(
                            "inspirehep_theme/references.html",
                            record=ref_record,
                            reference=reference
                        ))
                        row.append(ref_record.get('citation_count', ''))
                        out.append(row)
                else:
                    row.append(render_template_to_string(
                        "inspirehep_theme/references.html",
                        reference=reference))
                    row.append('')
                    out.append(row)

        return out
def index():
    """Query Elasticsearch using Invenio query syntax."""
    page = request.values.get("page", 1, type=int)
    size = request.values.get("size", 1, type=int)
    query = Query(request.values.get("q", ""))[(page - 1) * size : page * size]
    response = current_search_client.search(
        index=request.values.get("index", "demo"), doc_type=request.values.get("type"), body=query.body
    )
    return jsonify(**response)
Beispiel #26
0
def search_authors(name, size=20):
    """ Search for authors in the author index. """
    from hepdata.config import CFG_ES_AUTHORS

    index, doc_type = CFG_ES_AUTHORS

    query = {"size": size, "query": {"match": {"full_name": {"query": name, "fuzziness": "AUTO"}}}}

    results = es.search(index=index, doc_type=doc_type, body=query)
    return [x["_source"] for x in results["hits"]["hits"]]
Beispiel #27
0
def index(index_name='demo'):
    page = request.values.get('page', 1, type=int)
    size = request.values.get('size', 1, type=int)
    query = Query(request.values.get('q', ''))[(page-1)*size:page*size]
    response = current_search_client.search(
        index=index_name,
        doc_type=request.values.get('type', 'example'),
        body=query.body,
    )
    return jsonify(**response)
def test_regression_index_literature_record_with_related_records(
        es_clear, db, datadir, create_record):
    data = json.loads((datadir / "1503270.json").read_text())
    record = create_record("lit", data=data)

    response = es.search("records-hep")

    result = response["hits"]["hits"][0]["_source"]

    assert data["related_records"] == result["related_records"]
Beispiel #29
0
def test_cli_delete_edit_article_workflows(app_cli_runner):
    wf_to_be_deleted = build_workflow({}, data_type='hep')
    wf_to_be_deleted.save()
    start('edit_article', object_id=wf_to_be_deleted.id)
    wf_to_be_deleted = workflow_object_class.get(wf_to_be_deleted.id)
    wf_to_be_deleted.status = ObjectStatus.WAITING
    wf_to_be_deleted.created = datetime.datetime(2020, 7, 8, 12, 31, 8, 299777)
    wf_to_be_deleted.save()

    wf_in_error = build_workflow({}, data_type='hep')
    wf_in_error.status = ObjectStatus.ERROR
    wf_in_error.extra_data["_error_msg"] = "Error in WebColl"
    wf_in_error.created = datetime.datetime(2020, 7, 8, 12, 31, 8, 299777)
    wf_in_error.save()

    recent_wf = build_workflow({}, data_type='hep')
    recent_wf.save()
    start('edit_article', object_id=recent_wf.id)
    recent_wf = workflow_object_class.get(recent_wf.id)
    recent_wf.status = ObjectStatus.WAITING
    recent_wf.created = datetime.datetime(2020, 7, 11, 12, 31, 8, 299777)
    recent_wf.save()

    indices = ['holdingpen-hep']
    es.indices.refresh(indices)
    es_result = es.search(indices)
    assert es_result['hits']['total']['value'] == 3

    wf_count = WorkflowObjectModel.query.count()
    assert wf_count == 3

    result = app_cli_runner.invoke(workflows,
                                   ['delete_edit_article_older_than'])

    assert "Found 1 workflows to delete older than 48 hours" in result.output_bytes
    es.indices.refresh(indices)
    es_result = es.search(indices)
    assert es_result['hits']['total']['value'] == 2

    wf_count = WorkflowObjectModel.query.count()
    assert wf_count == 2
    assert WorkflowObjectModel.query.filter_by(
        id=wf_to_be_deleted.id).one_or_none() is None
Beispiel #30
0
    def get(self, **kwargs):
        """Search records.

        :returns: the search result containing hits and aggregations as
        returned by invenio-search.
        """
        page = request.values.get('page', 1, type=int)
        size = request.values.get('size', 10, type=int)
        if page*size >= self.max_result_window:
            raise MaxResultWindowRESTError()

        # Parse and slice query
        try:
            query = Query(request.values.get('q', ''))[(page-1)*size:page*size]
        except SyntaxError:
            raise InvalidQueryRESTError()

        # Arguments that must be added in prev/next links
        urlkwargs = dict()

        # Facets
        query, qs_kwargs = self.facets_factory(query, self.search_index)
        urlkwargs.update(qs_kwargs)

        # Sort
        query, qs_kwargs = self.sorter_factory(query, self.search_index)
        urlkwargs.update(qs_kwargs)

        # Execute search
        response = current_search_client.search(
            index=self.search_index,
            doc_type=self.search_type,
            body=query.body,
            version=True,
        )

        # Generate links for prev/next
        urlkwargs.update(
            size=size,
            q=request.values.get('q', ''),
            _external=True,
        )
        endpoint = 'invenio_records_rest.{0}_list'.format(self.pid_type)
        links = dict(self=url_for(endpoint, page=page, **urlkwargs))
        if page > 1:
            links['prev'] = url_for(endpoint, page=page-1, **urlkwargs)
        if size * page < int(response['hits']['total']) and \
                size * page < self.max_result_window:
            links['next'] = url_for(endpoint, page=page+1, **urlkwargs)

        return self.make_response(
            pid_fetcher=self.pid_fetcher,
            search_result=response,
            links=links,
        )
Beispiel #31
0
def test_migrate_mirror_broken_migrates_invalid(app_cli_runner):
    index = 'holdingpen-hep'
    build_workflow({})

    wf_count = WorkflowObjectModel.query.count()
    assert wf_count == 1

    es.indices.refresh(index)
    es_result = es.search(index)
    assert es_result['hits']['total'] == 1

    result = app_cli_runner.invoke(workflows, ['purge', '--yes-i-know'])
    assert result.exit_code == 0

    wf_count = WorkflowObjectModel.query.count()
    assert wf_count == 0

    es.indices.refresh(index)
    es_result = es.search(index)
    assert es_result['hits']['total'] == 0
def index():
    """Query Elasticsearch using Invenio query syntax."""
    page = request.values.get('page', 1, type=int)
    size = request.values.get('size', 1, type=int)
    query = Query(request.values.get('q', ''))[(page - 1) * size:page * size]
    response = current_search_client.search(
        index=request.values.get('index', 'demo'),
        doc_type=request.values.get('type'),
        body=query.body,
    )
    return jsonify(**response)
Beispiel #33
0
def perform_es_search(query_string, page, size, collection, sort=''):
    query, qs_kwargs = perform_query(query_string, page, size)
    search_result = current_search_client.search(
        index='records-{0}'.format(collection),
        doc_type=collection,
        sort=sort,
        body=query.body,
        version=True)

    results = [hit['_source'] for hit in search_result['hits']['hits']]
    return results
Beispiel #34
0
def perform_es_search(query_string, page, size, collection, sort=''):
    query, qs_kwargs = perform_query(query_string, page, size)
    search_result = current_search_client.search(
        index='records-{0}'.format(collection),
        doc_type=collection,
        sort=sort,
        body=query.body,
        version=True)

    results = [hit['_source'] for hit in search_result['hits']['hits']]
    return results
Beispiel #35
0
def index():
    """Query Elasticsearch using Invenio query syntax."""
    page = request.values.get('page', 1, type=int)
    size = request.values.get('size', 1, type=int)
    query = Query(request.values.get('q', ''))[(page-1)*size:page*size]
    response = current_search_client.search(
        index=request.values.get('index', 'demo'),
        doc_type=request.values.get('type'),
        body=query.body,
    )
    return jsonify(**response)
Beispiel #36
0
    def get(self, **kwargs):
        """Search records.

        :returns: the search result containing hits and aggregations as
        returned by invenio-search.
        """
        page = request.values.get('page', 1, type=int)
        size = request.values.get('size', 10, type=int)
        if page * size >= self.max_result_window:
            raise MaxResultWindowRESTError()

        # Arguments that must be added in prev/next links
        urlkwargs = dict()

        query, qs_kwargs = self.query_factory(self.search_index, page, size)
        urlkwargs.update(qs_kwargs)

        # Facets
        query, qs_kwargs = self.facets_factory(query, self.search_index)
        urlkwargs.update(qs_kwargs)

        # Sort
        query, qs_kwargs = self.sorter_factory(query, self.search_index)
        urlkwargs.update(qs_kwargs)

        # Execute search
        search_result = current_search_client.search(
            index=self.search_index,
            doc_type=self.search_type,
            body=query.body,
            version=True,
        )

        # Generate links for prev/next
        urlkwargs.update(
            size=size,
            q=request.values.get('q', ''),
            _external=True,
        )
        endpoint = 'invenio_records_rest.{0}_list'.format(self.pid_type)
        links = dict(self=url_for(endpoint, page=page, **urlkwargs))
        if page > 1:
            links['prev'] = url_for(endpoint, page=page-1, **urlkwargs)
        if size * page < int(search_result['hits']['total']) and \
                size * page < self.max_result_window:
            links['next'] = url_for(endpoint, page=page+1, **urlkwargs)

        return self.make_response(
            pid_fetcher=self.pid_fetcher,
            search_result=search_result,
            links=links,
            item_links_factory=self.item_links_factory,
        )
Beispiel #37
0
    def search(cls, query):
        """Search for objects using the invenio query syntax."""
        from flask import current_app as app
        from invenio_search import Query, current_search_client

        index = app.config['INDEXER_DEFAULT_INDEX']
        res = current_search_client.search(index=index,
                                           body=Query(query).body,
                                           size=1000)

        return [cls.get(x['_id']) for x in res['hits']['hits']
                if x['_score'] > 0.3]
Beispiel #38
0
def test_cli_purges_db_and_es(app_cli_runner):
    indices = ['holdingpen-hep', 'holdingpen-authors']
    build_workflow({}, data_type='hep')
    build_workflow({}, data_type='authors')

    wf_count = WorkflowObjectModel.query.count()
    assert wf_count == 2

    es.indices.refresh(indices)
    es_result = es.search(indices)
    assert es_result['hits']['total'] == 2

    result = app_cli_runner.invoke(workflows, ['purge', '--yes-i-know'])
    assert result.exit_code == 0

    wf_count = WorkflowObjectModel.query.count()
    assert wf_count == 0

    es.indices.refresh(indices)
    es_result = es.search(indices)
    assert es_result['hits']['total'] == 0
def test_index_data_record(base_app, es_clear, db, datadir, create_record):
    record = create_record("dat")

    expected_count = 1
    expected_metadata = deepcopy(record)
    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)

    response = es.search("records-data")

    assert response["hits"]["total"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
Beispiel #40
0
def get_records(**kwargs):
    """Get records."""
    page = kwargs.get('resumptionToken', {}).get('page', 1)
    size = current_app.config['OAISERVER_PAGE_SIZE']
    query = Query()[(page-1)*size:page*size]

    body = {}
    if 'set' in kwargs:
        body['must'] = [{'match': {'_oai.sets': kwargs['set']}}]

    time_range = {}
    if 'from_' in kwargs:
        time_range['gte'] = kwargs['from_']
    if 'until' in kwargs:
        time_range['lte'] = kwargs['until']
    if time_range:
        body['filter'] = [{'range': {'_oai.updated': time_range}}]

    if body:
        query.body = {'query': {'bool': body}}

    response = current_search_client.search(
        index=current_app.config['OAISERVER_RECORD_INDEX'],
        body=query.body,
    )

    class Pagination(object):
        """Dummy pagination class."""

        @property
        def total(self):
            """Return number of hits found."""
            return response['hits']['total']

        @property
        def has_next(self):
            """Return True if there are more results."""
            return page*size <= self.total

        @property
        def items(self):
            """Return iterator."""
            for result in response['hits']['hits']:
                yield {
                    'id': result['_id'],
                    'json': result['_source'],
                    # FIXME use ES
                    'updated': RecordMetadata.query.filter_by(
                        id=result['_id']).one().updated,
                }

    return Pagination()
Beispiel #41
0
def get_records_matching_field(field,
                               id,
                               index=None,
                               doc_type=None,
                               source=None):
    """ Checks if a record with a given ID exists in the index """

    query = {"size": 9999, 'query': {'match': {field: id}}}

    if source:
        query["_source"] = source

    return es.search(index=index, doc_type=doc_type, body=query)
Beispiel #42
0
def submit_edit_set(spec):
    """Insert a new set."""
    form = get_NewSetForm(request.form)
    if request.method == 'POST' and form.validate():
        old_set = OAISet.query.filter(spec=spec)
        query = Query(old_set.search_pattern)
        old_recid = current_search_client.search(index='records',
                                                 doc_type='record',
                                                 body=query.body,
                                                 fields='_id, oaiid')
        query = Query(form.search_pattern)
        new_recid = current_search_client.search(index='records',
                                                 doc_type='record',
                                                 body=query.body,
                                                 fields='_id, oaiid')
        recids_to_delete = set(old_recid) - set(new_recid)
        # TODO: marks records as deleted from set
        remove_recids_from_set(recids_to_delete)
        add_records_to_set(new_recid)
        flash('Set was changed')
        return redirect(url_for('.manage_sets'))
    return render_template('edit_set.html', edit_set_form=form, spec=spec)
Beispiel #43
0
def index():
    """Frontpage blueprint."""
    query = Query("")
    query.body["size"] = 10
    query.body["sort"] = [{"creation_date": "desc"}]

    response = current_search_client.search(
        index='records',
        body=query.body,
    )

    return render_template('zenodo_frontpage/index.html',
                           records=(h['_source']
                                    for h in response['hits']['hits']))
Beispiel #44
0
 def aggregate_and_check_version(expected_version):
     # Aggregate events
     StatAggregator(name='file-download-agg',
                    event='file-download',
                    aggregation_field='file_id',
                    aggregation_interval='day',
                    query_modifiers=[]).run()
     current_search_client.indices.refresh(index='*')
     res = current_search_client.search(
         index='stats-file-download',
         doc_type='file-download-day-aggregation',
         version=True)
     for hit in res['hits']['hits']:
         assert hit['_version'] == expected_version
def test_index_author_record(base_app, es_clear, db, datadir, create_record):
    data = json.loads((datadir / "999108.json").read_text())
    record = create_record("aut", data=data)

    expected_count = 1
    expected_metadata = data = json.loads(
        (datadir / "999108_expected.json").read_text())
    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)

    response = es.search("records-authors")

    assert response["hits"]["total"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
Beispiel #46
0
def index():
    """Frontpage blueprint."""
    query = Query("communities:zenodo AND access_right:open")
    query.body["size"] = 10
    query.body["sort"] = [{"creation_date": "desc"}]

    response = current_search_client.search(
        index='records',
        body=query.body,
    )

    return render_template(
        'zenodo_frontpage/index.html',
        records=(h['_source'] for h in response['hits']['hits'])
    )
Beispiel #47
0
    def get(self, **kwargs):
        """Search records.

        :returns: the search result containing hits and aggregations as
        returned by invenio-search.
        """
        page = request.values.get("page", 1, type=int)
        size = request.values.get("size", 10, type=int)
        if page * size >= self.max_result_window:
            raise MaxResultWindowRESTError()

        # Parse and slice query
        try:
            query = Query(request.values.get("q", ""))[(page - 1) * size : page * size]
        except SyntaxError:
            raise InvalidQueryRESTError()

        # Arguments that must be added in prev/next links
        urlkwargs = dict()

        # Facets
        query, qs_kwargs = self.facets_factory(query, self.search_index)
        urlkwargs.update(qs_kwargs)

        # Sort
        query, qs_kwargs = self.sorter_factory(query, self.search_index)
        urlkwargs.update(qs_kwargs)

        # Execute search
        response = current_search_client.search(
            index=self.search_index, doc_type=self.search_type, body=query.body, version=True
        )

        # Generate links for prev/next
        urlkwargs.update(size=size, q=request.values.get("q", ""), _external=True)
        endpoint = "invenio_records_rest.{0}_list".format(self.pid_type)
        links = dict(self=url_for(endpoint, page=page, **urlkwargs))
        if page > 1:
            links["prev"] = url_for(endpoint, page=page - 1, **urlkwargs)
        if size * page < int(response["hits"]["total"]) and size * page < self.max_result_window:
            links["next"] = url_for(endpoint, page=page + 1, **urlkwargs)

        return self.make_response(pid_fetcher=self.pid_fetcher, search_result=response, links=links)
Beispiel #48
0
def get_records(page=1):
    """Get records."""
    size = current_app.config['OAISERVER_PAGE_SIZE']
    query = Query()[(page-1)*size:page*size]

    response = current_search_client.search(
        index=current_app.config['OAISERVER_RECORD_INDEX'],
        body=query.body,
        # version=True,
    )

    for result in response['hits']['hits']:
        yield {
            # FIXME
            "id": result['_id'],
            "json": result['_source'],
            # FIXME retrieve from elastic search
            "updated": RecordMetadata.query.filter_by(
                id=result['_id']).one().updated
        }
Beispiel #49
0
    def _search(self):
        from invenio_search import current_search_client

        if self._results is None:
            if current_app.debug:
                import json
                json_body = json.dumps(self.body, indent=2)
                current_app.logger.debug(
                    "index: {0} - doc_type: {1} - query: {2}".format(
                        self.index,
                        self.doc_type,
                        json_body
                    )
                )
            self._results = current_search_client.search(
                index=self.index,
                doc_type=self.doc_type,
                body=self.body,
            )
        return self._results
Beispiel #50
0
def _search(query):
    """Make a call to the Elasticsearch instance.

    Receives the dictionary as the parameter.
    With the given paremeter, the Elasticsearch instance is being
    queried.

    :query:
        The query for the Elasticsearch instance.

        Example:
            query = {"query": {"match_all": {}}}

    :return:
        The Elasticsearch instance response.
    """
    elasticsearch_index = current_app.config.get('DISAMBIGUATION_RECORD_INDEX')

    return es.search(
        index=elasticsearch_index,
        body=query)['hits']['hits']
Beispiel #51
0
def get_author_collection_records_from_valid_authors(authors_refs):
    """Query elasticsearch for the author of the given authors references."""
    es_query = {
        "filter": {
            "bool": {
                "must": [
                    {"terms": {
                        "self.$ref": authors_refs
                    }}, {"match": {
                        "ids.type": "ORCID"
                    }}
                ]
            }
        }
    }
    authors = current_search_client.search(
        index='records-authors',
        doc_type='authors',
        body=es_query
    )['hits']['hits']
    return authors
Beispiel #52
0
    def search(self, size=25, page=1, query_string=None, sort_key=None):
        """Return search results for query."""
        # Arguments that must be added in prev/next links
        urlkwargs = dict()

        query, qs_kwargs = self.query_factory(
            self.search_index, page, size, query_string
        )
        urlkwargs.update(qs_kwargs)

        query, qs_kwargs = self.sorter_factory(
            query, self.search_index, sort_key
        )
        urlkwargs.update(qs_kwargs)

        search_result = current_search_client.search(
            index=self.search_index,
            doc_type=self.search_type,
            body=query.body,
            version=True,
        )
        return urlkwargs, search_result
Beispiel #53
0
def update():
    sets = Set.query.all()
    for set in sets:
        query = Query(form.query.data)
        response = current_search_client.search(
            index=set.search_index,
            doc_type=set.search_doc_type,
            body=query.body
        )
        ids = [(a['_id'], _get_oaiid(a)) for a in response['hits']['hits']]

        # get all current records with this set
        current_ids = []

        # new records that need to be added
        new_ids = ids - current_ids

        # records that were deleted from the set
        del_ids = current_ids - ids

        _add_records_to_set(new_ids, set.spec)
        _del_records_from_set(del_ids, set.spec)
Beispiel #54
0
    def get(self, **kwargs):
        """Search records.

        :returns: the search result containing hits and aggregations as
        returned by invenio-search.
        """
        page = request.values.get("page", 1, type=int)
        size = request.values.get("size", 10, type=int)
        sort = request.values.get("sort", "", type=str)
        query = Query(request.values.get("q", ""))[(page - 1) * size : page * size]

        for sort_key in sort.split(","):
            if sort_key:
                query = query.sort(sort_key)

        response = current_search_client.search(
            index=self.search_index, doc_type=self.search_type, body=query.body, version=True
        )
        links = {}
        if page > 1:
            links["prev"] = url_for(
                "invenio_records_rest.{0}_list".format(self.pid_type),
                page=page - 1,
                size=size,
                sort=sort,
                q=request.values.get("q", ""),
                _external=True,
            )
        if size * page < int(response["hits"]["total"]):
            links["next"] = url_for(
                "invenio_records_rest.{0}_list".format(self.pid_type),
                page=page + 1,
                size=size,
                sort=sort,
                q=request.values.get("q", ""),
                _external=True,
            )

        return self.make_response(pid_fetcher=self.pid_fetcher, search_result=response, links=links)
def _percolate_query(index, doc_type, percolator_doc_type, document):
    """Get results for a percolate query."""
    if ES_VERSION[0] in (2, 5):
        results = current_search_client.percolate(
            index=index, doc_type=doc_type, allow_no_indices=True,
            ignore_unavailable=True, body={'doc': document}
        )
        return results['matches']
    elif ES_VERSION[0] == 6:
        results = current_search_client.search(
            index=index, doc_type=percolator_doc_type, allow_no_indices=True,
            ignore_unavailable=True, body={
                'query': {
                    'percolate': {
                        'field': 'query',
                        'document_type': percolator_doc_type,
                        'document': document,
                    }
                }
            }
        )
        return results['hits']['hits']
Beispiel #56
0
    def get_expired_embargos(cls):
        """Get records for which the embargo period have expired."""
        query_str = 'access_right:{0} AND embargo_date:{{* TO {1}}}'.format(
            cls.EMBARGOED,
            datetime.utcnow().isoformat()
        )

        query = Query()
        query.body['from'] = 0
        query.body['size'] = 1000
        query.body['query'] = {
            'query_string': {
                'query': query_str,
                'allow_leading_wildcard': False,
            },
        }

        endpoints = current_app.config['RECORDS_REST_ENDPOINTS']
        index = endpoints['recid']['search_index']

        response = current_search_client.search(
            index=index, body=query.body
        )
        return [hit['_id'] for hit in response['hits']['hits']]
Beispiel #57
0
    def citations(self):
        """Return citation export for single record."""

        out = []
        row = []

        # Get citations
        es_query = IQ('refersto:' + self.record['control_number'])
        record_citations = es.search(
            index='records-hep',
            doc_type='hep',
            body={"query": es_query.to_dict()},
            size=10,
            _source=[
                'control_number',
                'citation_count',
                'titles',
                'earliest_date'
            ]
        )['hits']['hits']

        for citation in record_citations:

            citation_from_es = es.get_source(index='records-hep',
                                             id=citation['_id'],
                                             doc_type='hep',
                                             ignore=404)

            row.append(render_template_to_string(
                "inspirehep_theme/citations.html",
                record=citation_from_es))
            row.append(citation.get('citation_count', ''))
            out.append(row)
            row = []

        return out
Beispiel #58
0
def update_record(obj, eng):
    """Updates existing record"""

    doi = get_first_doi(obj)

    query = {'query': {'bool': {'must': [{'match': {'dois.value': doi}}], }}}
    search_result = es.search(index='records-record', doc_type='record-v1.0.0', body=query)

    recid = search_result['hits']['hits'][0]['_source']['control_number']

    obj.extra_data['recid'] = recid
    obj.data['control_number'] = recid

    pid = PersistentIdentifier.get('recid', recid)
    existing_record = Record.get_record(pid.object_uuid)

    if '_files' in existing_record:
        obj.data['_files'] = existing_record['_files']
    if '_oai' in existing_record:
        obj.data['_oai'] = existing_record['_oai']

    # preserving original creation date
    creation_date = existing_record['record_creation_date']
    obj.data['record_creation_date'] = creation_date
    obj.data['record_creation_year'] = parse_date(creation_date).year
    existing_record.clear()
    existing_record.update(obj.data)

    try:
        existing_record.commit()
        obj.save()
        db.session.commit()
    except ValidationError as err:
        __halt_and_notify("Validation error: %s." % err, eng)
    except SchemaError as err:
        __halt_and_notify('SchemaError during record validation! %s' % err, eng)
Beispiel #59
0
def get_institution_people_datatables_rows(recid):
    """
    Datatable rows to render people working in an institution.

    :param recid: id of the institution.
    :type recid: string
    """
    query = {
        "query": {
            "term": {
                "authors.affiliations.recid": recid
            }
        },
        "aggs": {
            "authors": {
                "nested": {
                    "path": "authors"
                },
                "aggs": {
                    "affiliated": {
                        "filter": {
                            "term": {"authors.affiliations.recid": recid}
                        },
                        "aggs": {
                            "byrecid": {
                                "terms": {
                                    "field": "authors.recid"
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    records_from_es = current_search_client.search(
        index='records-hep',
        doc_type='hep',
        body=query,
        search_type='count'
    )

    # Extract all the record ids from the aggregation
    papers_per_author = records_from_es[
        'aggregations'
    ]['authors']['affiliated']['byrecid']['buckets']
    recids = [int(paper['key']) for paper in papers_per_author]

    # Generate query to retrieve records from author index
    query = ""
    for i, recid in enumerate(recids):
        query += "recid:{}".format(recid)
        if i != len(recids) - 1:
            query += " OR "

    results = perform_es_search(
        query, 'records-authors', size=9999, fields=['control_number', 'name']
    )
    recid_map = dict(
        [(int(result.control_number), result.name) for result in results]
    )

    result = []
    author_html_link = "<a href='/authors/{recid}'>{name}</a>"
    for author in papers_per_author:
        row = []
        try:
            row.append(
                author_html_link.format(
                    recid=author['key'],
                    name=recid_map[author['key']].preferred_name
                )
            )
        except:
            # No preferred name, use value
            row.append(
                author_html_link.format(
                    recid=author['key'],
                    name=recid_map[author['key']].value
                )
            )
        row.append(author['doc_count'])
        result.append(row)

    return result