Exemple #1
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match authors data model."""
    form_fields = copy.deepcopy(formdata)

    filter_empty_elements(
        form_fields,
        ['institution_history', 'advisors', 'websites', 'experiments'])
    data = updateform.do(form_fields)

    # ======
    # Schema
    # ======
    if '$schema' not in data and '$schema' in obj.data:
        data['$schema'] = obj.data.get('$schema')

    if '$schema' in data and not data['$schema'].startswith('http'):
        data['$schema'] = url_for('invenio_jsonschemas.get_schema',
                                  schema_path="records/{0}".format(
                                      data['$schema']))

    author_name = ''

    if 'family_name' in form_fields and form_fields['family_name']:
        author_name = form_fields['family_name'].strip() + ', '
    if 'given_names' in form_fields and form_fields['given_names']:
        author_name += form_fields['given_names']

    if author_name:
        data.get('name', {})['value'] = author_name

    # Add comments to extra data
    if 'extra_comments' in form_fields and form_fields['extra_comments']:
        data['_private_note'] = form_fields['extra_comments']

    # Add email to extra data
    if "public_email" in form_fields and form_fields["public_email"]:
        obj.extra_data["public_email"] = form_fields["public_email"]
        data["public_email"] = form_fields["public_email"]

    # Add HEPNAMES collection
    data["collections"] = [{"primary": "HEPNAMES"}]

    # ==========
    # Owner Info
    # ==========
    try:
        user_email = User.query.get(obj.id_user).email
    except AttributeError:
        user_email = ''
    source = "{0}{1}".format('inspire:uid:', obj.id_user)
    data['acquisition_source'] = dict(
        source=source,
        email=user_email,
        date=date.today().isoformat(),
        method="submission",
        submission_number=str(obj.id),
    )
    # Finally, set data
    return data
Exemple #2
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match authors data model."""
    form_fields = copy.deepcopy(formdata)

    filter_empty_elements(
        form_fields,
        ['institution_history', 'advisors',
         'websites', 'experiments']
    )
    data = updateform.do(form_fields)

    # ======
    # Schema
    # ======
    if '$schema' in data and not data['$schema'].startswith('http'):
        data['$schema'] = url_for(
            'invenio_jsonschemas.get_schema',
            schema_path="records/{0}".format(data['$schema'])
        )

    author_name = ''

    if 'family_name' in form_fields and form_fields['family_name']:
        author_name = form_fields['family_name'].strip() + ', '
    if 'given_names' in form_fields and form_fields['given_names']:
        author_name += form_fields['given_names']

    if author_name:
        data.get('name', {})['value'] = author_name

    # Add comments to extra data
    if "comments" in form_fields and form_fields["comments"]:
        obj.extra_data["comments"] = form_fields["comments"]
        data["_private_note"] = form_fields["comments"]

    # Add HEPNAMES collection
    data["collections"] = [{
        "primary": "HEPNAMES"
    }]

    # ==========
    # Owner Info
    # ==========
    try:
        user_email = User.query.get(obj.id_user).email
    except AttributeError:
        user_email = ''
    sources = ["{0}{1}".format('inspire:uid:', obj.id_user)]
    data['acquisition_source'] = dict(
        source=sources,
        email=user_email,
        date=date.today().isoformat(),
        method="submission",
        submission_number=str(obj.id),
    )
    # Finally, set data
    return data
Exemple #3
0
def convert_data_to_model(obj, eng):
    """Manipulate form data to match author model keys."""
    # Save original form data for later access
    form_fields = copy.deepcopy(obj.data)
    obj.extra_data["formdata"] = copy.deepcopy(form_fields)

    filter_empty_elements(
        obj.data,
        ['institution_history', 'advisors',
         'websites', 'experiments']
    )
    converted = updateform.do(obj.data)
    obj.data.update(converted)

    author_name = ''

    if 'family_name' in form_fields and form_fields['family_name']:
        author_name = form_fields['family_name'].strip() + ', '
    if 'given_names' in form_fields and form_fields['given_names']:
        author_name += form_fields['given_names']

    if author_name:
        obj.data.get('name', {})['value'] = author_name

    # Add comments to extra data
    if "comments" in form_fields and form_fields["comments"]:
        obj.extra_data["comments"] = form_fields["comments"]
        obj.data["_private_note"] = form_fields["comments"]

    # Add HEPNAMES collection
    obj.data["collections"] = [{
        "primary": "HEPNAMES"
    }]

    # ==========
    # Owner Info
    # ==========
    try:
        user_email = User.query.get(obj.id_user).email
    except AttributeError:
        user_email = ''
    sources = ["{0}{1}".format('inspire:uid:', obj.id_user)]
    obj.data['acquisition_source'] = dict(
        source=sources,
        email=user_email,
        date=date.today().isoformat(),
        method="submission",
        submission_number=obj.id,
    )
Exemple #4
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match authors data model."""
    form_fields = copy.deepcopy(formdata)

    filter_empty_elements(
        form_fields,
        ['institution_history', 'advisors',
         'websites', 'experiments']
    )
    data = updateform.do(form_fields)

    # ======
    # Schema
    # ======
    if '$schema' not in data and '$schema' in obj.data:
        data['$schema'] = obj.data.get('$schema')

    if '$schema' in data and not data['$schema'].startswith('http'):
        data['$schema'] = url_for(
            'invenio_jsonschemas.get_schema',
            schema_path="records/{0}".format(data['$schema'])
        )

    author_name = ''

    if 'family_name' in form_fields and form_fields['family_name']:
        author_name = form_fields['family_name'].strip() + ', '
    if 'given_names' in form_fields and form_fields['given_names']:
        author_name += form_fields['given_names']

    if author_name:
        data.get('name', {})['value'] = author_name

    # Add comments to extra data
    if 'extra_comments' in form_fields and form_fields['extra_comments']:
        data.setdefault('_private_notes', []).append({
            'source': 'submitter',
            'value': form_fields['extra_comments']
        })

    data['stub'] = False

    # ==========
    # Submitter Info
    # ==========
    try:
        user_email = User.query.get(obj.id_user).email
    except AttributeError:
        user_email = ''
    try:
        orcid = UserIdentity.query.filter_by(
            id_user=obj.id_user,
            method='orcid'
        ).one().id
    except NoResultFound:
        orcid = ''
    data['acquisition_source'] = dict(
        email=user_email,
        datetime=datetime.datetime.utcnow().isoformat(),
        method="submitter",
        orcid=orcid,
        submission_number=str(obj.id),
        internal_uid=int(obj.id_user),
    )

    strip_empty_values(data)

    validate(data, 'authors')

    return data
Exemple #5
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match literature data model."""
    def _is_arxiv_url(url):
        return 'arxiv.org' in url

    form_fields = copy.deepcopy(formdata)
    filter_empty_elements(form_fields,
                          ['authors', 'supervisors', 'report_numbers'])

    builder = LiteratureBuilder(source='submitter')

    for author in form_fields.get('authors', []):
        builder.add_author(
            builder.make_author(author['full_name'],
                                affiliations=force_list(author['affiliation'])
                                if author['affiliation'] else None,
                                roles=['author']))

    for supervisor in form_fields.get('supervisors', []):
        builder.add_author(
            builder.make_author(
                supervisor['full_name'],
                affiliations=force_list(supervisor['affiliation'])
                if author['affiliation'] else None,
                roles=['supervisor']))

    builder.add_title(title=form_fields.get('title'))

    document_type = 'conference paper' if form_fields.get('conf_name') \
        else form_fields.get('type_of_doc', [])
    if document_type == 'chapter':
        document_type = 'book chapter'

    builder.add_document_type(document_type=document_type)

    builder.add_abstract(
        abstract=form_fields.get('abstract'),
        source='arXiv' if form_fields.get('categories') else None)

    if form_fields.get('arxiv_id') and form_fields.get('categories'):
        builder.add_arxiv_eprint(
            arxiv_id=form_fields.get('arxiv_id'),
            arxiv_categories=form_fields.get('categories').split())

    builder.add_doi(doi=form_fields.get('doi'))

    builder.add_inspire_categories(
        subject_terms=form_fields.get('subject_term'), source='user')

    for key in ('extra_comments', 'nonpublic_note', 'hidden_notes',
                'conf_name'):
        builder.add_private_note(private_notes=form_fields.get(key))

    year = form_fields.get('year')
    try:
        year = int(year)
    except (TypeError, ValueError):
        year = None

    builder.add_preprint_date(
        preprint_date=form_fields.get('preprint_created'))

    if form_fields.get('type_of_doc') == 'thesis':
        builder.add_thesis(defense_date=form_fields.get('defense_date'),
                           degree_type=form_fields.get('degree_type'),
                           institution=form_fields.get('institution'),
                           date=form_fields.get('thesis_date'))

    if form_fields.get('type_of_doc') == 'chapter':
        if not form_fields.get('journal_title'):
            builder.add_book_series(title=form_fields.get('series_title'))

    if form_fields.get('type_of_doc') == 'book':
        if form_fields.get('journal_title'):
            form_fields['volume'] = form_fields.get('series_volume')
        else:
            builder.add_book_series(title=form_fields.get('series_title'),
                                    volume=form_fields.get('series_volume'))
        builder.add_book(publisher=form_fields.get('publisher_name'),
                         place=form_fields.get('publication_place'),
                         date=form_fields.get('publication_date'))

    builder.add_publication_info(
        year=year,
        cnum=form_fields.get('conference_id'),
        journal_issue=form_fields.get('issue'),
        journal_title=form_fields.get('journal_title'),
        journal_volume=form_fields.get('volume'),
        page_start=form_fields.get('start_page'),
        page_end=form_fields.get('end_page'),
        artid=form_fields.get('artid'),
        parent_record=form_fields.get('parent_book'))

    builder.add_accelerator_experiments_legacy_name(
        legacy_name=form_fields.get('experiment'))

    language = form_fields.get('other_language') \
        if form_fields.get('language') == 'oth' \
        else form_fields.get('language')
    builder.add_language(language=language)

    if form_fields.get('title_translation'):
        builder.add_title_translation(
            title=form_fields['title_translation'],
            language='en',
        )

    builder.add_title(title=form_fields.get('title_arXiv'), source='arXiv')

    builder.add_title(title=form_fields.get('title_crossref'),
                      source='crossref')

    builder.add_license(url=form_fields.get('license_url'))

    builder.add_public_note(public_note=form_fields.get('public_notes'))

    builder.add_public_note(
        public_note=form_fields.get('note'),
        source='arXiv' if form_fields.get('categories') else 'CrossRef')

    form_url = form_fields.get('url')
    form_additional_url = form_fields.get('additional_url')
    if form_url and not _is_arxiv_url(form_url):
        obj.extra_data['submission_pdf'] = form_url
        if not form_additional_url:
            builder.add_url(url=form_url)

    if form_additional_url and not _is_arxiv_url(form_additional_url):
        builder.add_url(url=form_additional_url)

    [
        builder.add_report_number(
            report_number=report_number.get('report_number'))
        for report_number in form_fields.get('report_numbers', [])
    ]

    builder.add_collaboration(collaboration=form_fields.get('collaboration'))

    builder.add_acquisition_source(
        datetime=datetime.datetime.utcnow().isoformat(),
        submission_number=obj.id,
        internal_uid=int(obj.id_user),
        email=form_fields.get('email'),
        orcid=form_fields.get('orcid'),
        method='submitter')

    return builder.record
Exemple #6
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match literature data model."""
    def _is_arxiv_url(url):
        return 'arxiv.org' in url

    form_fields = copy.deepcopy(formdata)
    filter_empty_elements(
        form_fields, ['authors', 'supervisors', 'report_numbers']
    )

    builder = LiteratureBuilder(source='submitter')

    for author in form_fields.get('authors', []):
        builder.add_author(builder.make_author(
            author['full_name'],
            affiliations=force_list(author['affiliation'])
            if author['affiliation'] else None,
            roles=['author']
        ))

    for supervisor in form_fields.get('supervisors', []):
        builder.add_author(builder.make_author(
            supervisor['full_name'],
            affiliations=force_list(supervisor['affiliation'])
            if author['affiliation'] else None,
            roles=['supervisor']
        ))

    builder.add_title(title=form_fields.get('title'))

    document_type = 'conference paper' if form_fields.get('conf_name') \
        else form_fields.get('type_of_doc', [])

    builder.add_document_type(
        document_type=document_type
    )

    builder.add_abstract(
        abstract=form_fields.get('abstract'),
        source='arXiv' if form_fields.get('categories') else None
    )

    if form_fields.get('arxiv_id') and form_fields.get('categories'):
        builder.add_arxiv_eprint(
            arxiv_id=form_fields.get('arxiv_id'),
            arxiv_categories=form_fields.get('categories').split()
        )

    builder.add_doi(doi=form_fields.get('doi'))

    builder.add_inspire_categories(
        subject_terms=form_fields.get('subject_term'),
        source='user'
    )

    for key in ('extra_comments', 'nonpublic_note',
                'hidden_notes', 'conf_name', 'references'):
        builder.add_private_note(
            private_notes=form_fields.get(key)
        )

    year = form_fields.get('year')
    try:
        year = int(year)
    except (TypeError, ValueError):
        year = None

    builder.add_publication_info(
        year=year,
        cnum=form_fields.get('conference_id'),
        journal_issue=form_fields.get('issue'),
        journal_title=form_fields.get('journal_title'),
        journal_volume=form_fields.get('volume'),
        page_start=form_fields.get('page_start'),
        page_end=form_fields.get('page_end'),
        artid=form_fields.get('artid')
    )

    builder.add_preprint_date(
        preprint_date=form_fields.get('preprint_created')
    )

    if form_fields.get('type_of_doc') == 'thesis':
        builder.add_thesis(
            defense_date=form_fields.get('defense_date'),
            degree_type=form_fields.get('degree_type'),
            institution=form_fields.get('institution'),
            date=form_fields.get('thesis_date')
        )

    builder.add_accelerator_experiments_legacy_name(
        legacy_name=form_fields.get('experiment')
    )

    language = form_fields.get('other_language') \
        if form_fields.get('language') == 'oth' \
        else form_fields.get('language')
    builder.add_language(language=language)

    builder.add_title_translation(title=form_fields.get('title_translation'))

    builder.add_title(
        title=form_fields.get('title_arXiv'),
        source='arXiv'
    )

    builder.add_title(
        title=form_fields.get('title_crossref'),
        source='crossref'
    )

    builder.add_license(url=form_fields.get('license_url'))

    builder.add_public_note(public_note=form_fields.get('public_notes'))

    builder.add_public_note(
        public_note=form_fields.get('note'),
        source='arXiv' if form_fields.get('categories') else 'CrossRef'
    )

    form_url = form_fields.get('url')
    form_additional_url = form_fields.get('additional_url')
    if form_url and not _is_arxiv_url(form_url):
        obj.extra_data['submission_pdf'] = form_url
        if not form_additional_url:
            builder.add_url(url=form_url)

    if form_additional_url and not _is_arxiv_url(form_additional_url):
        builder.add_url(url=form_additional_url)

    [builder.add_report_number(
        report_number=report_number.get('report_number')
    ) for report_number in form_fields.get('report_numbers', [])]

    builder.add_collaboration(collaboration=form_fields.get('collaboration'))

    builder.add_acquisition_source(
        datetime=datetime.datetime.utcnow().isoformat(),
        submission_number=obj.id,
        internal_uid=int(obj.id_user),
        email=form_fields.get('email'),
        orcid=form_fields.get('orcid'),
        method='submitter'
    )
    builder.validate_record()

    return builder.record
Exemple #7
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match literature data model."""
    form_fields = copy.deepcopy(formdata)
    filter_empty_elements(
        form_fields, ['authors', 'supervisors', 'report_numbers']
    )

    obj.extra_data["submission_data"] = {}

    data = literature.do(form_fields)

    # Add extra fields that need to be computed or depend on other
    # fields.
    #
    # ======
    # Schema
    # ======
    if '$schema' in data and not data['$schema'].startswith('http'):
        data['$schema'] = url_for(
            'invenio_jsonschemas.get_schema',
            schema_path="records/{0}".format(data['$schema'])
        )

    # ============================
    # Collection
    # ============================
    data['collections'] = [{'primary': "HEP"}]
    if form_fields['type_of_doc'] == 'thesis':
        data['collections'].append({'primary': "THESIS"})
    if "field_categories" in data:
        # Check if it was imported from arXiv
        if any([x["scheme"] == "arXiv" for x in data["field_categories"]]):
            data['collections'].extend([{'primary': "arXiv"},
                                        {'primary': "Citeable"}])
            # Add arXiv as source
            if data.get("abstracts"):
                data['abstracts'][0]['source'] = 'arXiv'
            if form_fields.get("arxiv_id"):
                data['external_system_numbers'] = [{
                    'value': 'oai:arXiv.org:' + form_fields['arxiv_id'],
                    'institute': 'arXiv'
                }]
    if "publication_info" in data:
        if all([key in data['publication_info'][0].keys() for key in
               ('year', 'journal_issue', 'journal_volume', 'page_start',
                'page_end', 'artid')]):
            # NOTE: Only peer reviewed journals should have this collection
            # we are adding it here but ideally should be manually added
            # by a curator.
            data['collections'].append({'primary': "Published"})
            # Add Citeable collection if not present
            collections = [x['primary'] for x in data['collections']]
            if "Citeable" not in collections:
                data['collections'].append({'primary': "Citeable"})
    # ============================
    # Title source and cleanup
    # ============================
    try:
        # Clean up all extra spaces in title
        data['titles'][0]['title'] = " ".join(
            data['titles'][0]['title'].split()
        )
        title = data['titles'][0]['title']
    except (KeyError, IndexError):
        title = ""
    if form_fields.get('title_arXiv'):
        title_arxiv = " ".join(form_fields.get('title_arXiv').split())
        if title == title_arxiv:
            data['titles'][0]["source"] = "arXiv"
        else:
            data['titles'].append({
                'title': title_arxiv,
                'source': "arXiv"
            })
    if form_fields.get('title_crossref'):
        title_crossref = " ".join(
            form_fields.get('title_crossref').split()
        )
        if title == title_crossref:
            data['titles'][0]["source"] = "CrossRef"
        else:
            data['titles'].append({
                'title': title_crossref,
                'source': "CrossRef"
            })
    try:
        data['titles'][0]['source']
    except KeyError:
        # Title has no source, so should be the submitter
        data['titles'][0]['source'] = "submitter"

    # ============================
    # Conference name
    # ============================
    if 'conf_name' in form_fields:
        if 'nonpublic_note' in form_fields:
            data.setdefault("hidden_notes", []).append({
                "value": form_fields['conf_name']
            })
            data['hidden_notes'].append({
                'value': form_fields['nonpublic_note']
            })
        else:
            data.setdefault("hidden_notes", []).append({
                "value": form_fields['conf_name']
            })
        data['collections'].extend([{'primary': "ConferencePaper"}])

    # ============================
    # Page number
    # ============================
    if 'page_nr' not in data:
        first_publication_info = data.get('publication_info', [{}])[0]

        page_start = first_publication_info.get('page_start')
        page_end = first_publication_info.get('page_end')

        if page_start and page_end:
            try:
                data['page_nr'] = int(page_end) - int(page_start) + 1
            except (TypeError, ValueError):
                pass

    # ============================
    # Language
    # ============================
    if data.get("languages", []) and data["languages"][0] == "oth":
        if form_fields.get("other_language"):
            data["languages"] = [form_fields["other_language"]]

    # ==========
    # Owner Info
    # ==========
    # TODO Make sure we are getting the email correctly
    userid = obj.id_user
    try:
        email = User.query.get(userid).email
    except AttributeError:
        email = ''
    try:
        # TODO Make sure we are getting the ORCID id correctly
        source = UserIdentity.query.filter_by(id_user=userid, method='orcid').one()
    except NoResultFound:
        source = ''
    if source:
        source = source.method + ':' + source.id
    data['acquisition_source'] = dict(
        source=source,
        email=email,
        date=date.today().isoformat(),
        method="submission",
        submission_number=str(obj.id),
    )
    # ==============
    # References
    # ==============
    if form_fields.get('references'):
        obj.extra_data["submission_data"]['references'] = form_fields.get('references')
    # ==============
    # Extra comments
    # ==============
    if form_fields.get('extra_comments'):
        data.setdefault('hidden_notes', []).append(
            {
                'value': form_fields['extra_comments'],
                'source': 'submitter'
            }
        )
        obj.extra_data["submission_data"]["extra_comments"] = form_fields.get("extra_comments")
    # ======================================
    # Journal name Knowledge Base conversion
    # ======================================
    if data.get("publication_info", [{}])[0].get("journal_title"):
        # journals_kb = dict([(x['key'].lower(), x['value'])
        #                     for x in get_kb_mappings(current_app.config.get("DEPOSIT_INSPIRE_JOURNALS_KB"))])

        # data['publication_info']['journal_title'] = journals_kb.get(data['publication_info']['journal_title'].lower(),
        #                                                                 data['publication_info']['journal_title'])
        # TODO convert using journal records
        pass

    if 'pdf' in data:
        obj.extra_data["submission_data"]["pdf"] = data.pop("pdf")

    # Finally, return the converted data
    return data
def formdata_to_model(obj, formdata):
    """Manipulate form data to match literature data model."""
    form_fields = copy.deepcopy(formdata)
    filter_empty_elements(form_fields,
                          ['authors', 'supervisors', 'report_numbers'])

    obj.extra_data["submission_data"] = {}

    data = literature.do(form_fields)

    # Add extra fields that need to be computed or depend on other
    # fields.
    #
    # ======
    # Schema
    # ======
    if '$schema' in data and not data['$schema'].startswith('http'):
        data['$schema'] = url_for('invenio_jsonschemas.get_schema',
                                  schema_path="records/{0}".format(
                                      data['$schema']))

    # ============================
    # Collection
    # ============================
    data['collections'] = [{'primary': "HEP"}]
    if form_fields['type_of_doc'] == 'thesis':
        data['collections'].append({'primary': "THESIS"})
    if "field_categories" in data:
        # Check if it was imported from arXiv
        if any([x["scheme"] == "arXiv" for x in data["field_categories"]]):
            data['collections'].extend([{
                'primary': "arXiv"
            }, {
                'primary': "Citeable"
            }])
            # Add arXiv as source
            if data.get("abstracts"):
                data['abstracts'][0]['source'] = 'arXiv'
            if form_fields.get("arxiv_id"):
                data['external_system_numbers'] = [{
                    'value':
                    'oai:arXiv.org:' + form_fields['arxiv_id'],
                    'institute':
                    'arXiv'
                }]
    if "publication_info" in data:
        if all([
                key in data['publication_info'][0].keys()
                for key in ('year', 'journal_issue', 'journal_volume',
                            'page_start', 'page_end', 'artid')
        ]):
            # NOTE: Only peer reviewed journals should have this collection
            # we are adding it here but ideally should be manually added
            # by a curator.
            data['collections'].append({'primary': "Published"})
            # Add Citeable collection if not present
            collections = [x['primary'] for x in data['collections']]
            if "Citeable" not in collections:
                data['collections'].append({'primary': "Citeable"})
    # ============================
    # Title source and cleanup
    # ============================
    try:
        # Clean up all extra spaces in title
        data['titles'][0]['title'] = " ".join(
            data['titles'][0]['title'].split())
        title = data['titles'][0]['title']
    except (KeyError, IndexError):
        title = ""
    if form_fields.get('title_arXiv'):
        title_arxiv = " ".join(form_fields.get('title_arXiv').split())
        if title == title_arxiv:
            data['titles'][0]["source"] = "arXiv"
        else:
            data['titles'].append({'title': title_arxiv, 'source': "arXiv"})
    if form_fields.get('title_crossref'):
        title_crossref = " ".join(form_fields.get('title_crossref').split())
        if title == title_crossref:
            data['titles'][0]["source"] = "CrossRef"
        else:
            data['titles'].append({
                'title': title_crossref,
                'source': "CrossRef"
            })
    try:
        data['titles'][0]['source']
    except KeyError:
        # Title has no source, so should be the submitter
        data['titles'][0]['source'] = "submitter"

    # ============================
    # Conference name
    # ============================
    if 'conf_name' in form_fields:
        if 'nonpublic_note' in form_fields:
            data.setdefault("hidden_notes",
                            []).append({"value": form_fields['conf_name']})
            data['hidden_notes'].append(
                {'value': form_fields['nonpublic_note']})
        else:
            data.setdefault("hidden_notes",
                            []).append({"value": form_fields['conf_name']})
        data['collections'].extend([{'primary': "ConferencePaper"}])

    # ============================
    # Page number
    # ============================
    if 'page_nr' not in data:
        first_publication_info = data.get('publication_info', [{}])[0]

        page_start = first_publication_info.get('page_start')
        page_end = first_publication_info.get('page_end')

        if page_start and page_end:
            try:
                data['page_nr'] = int(page_end) - int(page_start) + 1
            except (TypeError, ValueError):
                pass

    # ============================
    # Language
    # ============================
    if data.get("languages", []) and data["languages"][0] == "oth":
        if form_fields.get("other_language"):
            data["languages"] = [form_fields["other_language"]]

    # ==========
    # Owner Info
    # ==========
    # TODO Make sure we are getting the email correctly
    userid = obj.id_user
    try:
        email = User.query.get(userid).email
    except AttributeError:
        email = ''
    try:
        # TODO Make sure we are getting the ORCID id correctly
        source = UserIdentity.query.filter_by(id_user=userid,
                                              method='orcid').one()
    except NoResultFound:
        source = ''
    if source:
        source = source.method + ':' + source.id
    data['acquisition_source'] = dict(
        source=source,
        email=email,
        date=date.today().isoformat(),
        method="submission",
        submission_number=str(obj.id),
    )
    # ==============
    # References
    # ==============
    if form_fields.get('references'):
        obj.extra_data["submission_data"]['references'] = form_fields.get(
            'references')
    # ==============
    # Extra comments
    # ==============
    if form_fields.get('extra_comments'):
        data.setdefault('hidden_notes', []).append({
            'value':
            form_fields['extra_comments'],
            'source':
            'submitter'
        })
        obj.extra_data["submission_data"]["extra_comments"] = form_fields.get(
            "extra_comments")
    # ======================================
    # Journal name Knowledge Base conversion
    # ======================================
    if data.get("publication_info", [{}])[0].get("journal_title"):
        # journals_kb = dict([(x['key'].lower(), x['value'])
        #                     for x in get_kb_mappings(current_app.config.get("DEPOSIT_INSPIRE_JOURNALS_KB"))])

        # data['publication_info']['journal_title'] = journals_kb.get(data['publication_info']['journal_title'].lower(),
        #                                                                 data['publication_info']['journal_title'])
        # TODO convert using journal records
        pass

    if 'pdf' in data:
        obj.extra_data["submission_data"]["pdf"] = data.pop("pdf")

    # Finally, return the converted data
    return data
Exemple #9
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match authors data model."""
    form_fields = copy.deepcopy(formdata)

    filter_empty_elements(
        form_fields,
        ['institution_history', 'advisors',
         'websites', 'experiments']
    )
    data = updateform.do(form_fields)

    # ===========
    # Collections
    # ===========
    data['_collections'] = ['Authors']

    # ======
    # Schema
    # ======

    # FIXME it's not clear whether $schema is ever present at this stage
    if '$schema' not in data and '$schema' in obj.data:
        data['$schema'] = obj.data.get('$schema')
    if '$schema' in data:
        ensure_valid_schema(data)

    author_name = ''

    if 'family_name' in form_fields and form_fields['family_name']:
        author_name = form_fields['family_name'].strip() + ', '
    if 'given_names' in form_fields and form_fields['given_names']:
        author_name += form_fields['given_names']

    if author_name:
        data.get('name', {})['value'] = author_name

    # Add comments to extra data
    if 'extra_comments' in form_fields and form_fields['extra_comments']:
        data.setdefault('_private_notes', []).append({
            'source': 'submitter',
            'value': form_fields['extra_comments']
        })

    data['stub'] = False

    # ==========
    # Submitter Info
    # ==========
    try:
        user_email = User.query.get(obj.id_user).email
    except AttributeError:
        user_email = ''
    try:
        orcid = UserIdentity.query.filter_by(
            id_user=obj.id_user,
            method='orcid'
        ).one().id
    except NoResultFound:
        orcid = ''
    data['acquisition_source'] = dict(
        email=user_email,
        datetime=datetime.datetime.utcnow().isoformat(),
        method="submitter",
        orcid=orcid,
        submission_number=str(obj.id),
        internal_uid=int(obj.id_user),
    )

    data = strip_empty_values(data)

    return data
Exemple #10
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match authors data model."""
    form_fields = copy.deepcopy(formdata)

    filter_empty_elements(
        form_fields,
        ['institution_history', 'advisors', 'websites', 'experiments'])
    data = updateform.do(form_fields)

    # ===========
    # Collections
    # ===========
    data['_collections'] = ['Authors']

    # ======
    # Schema
    # ======
    if '$schema' not in data and '$schema' in obj.data:
        data['$schema'] = obj.data.get('$schema')

    if '$schema' in data and not data['$schema'].startswith('http'):
        data['$schema'] = url_for('invenio_jsonschemas.get_schema',
                                  schema_path="records/{0}".format(
                                      data['$schema']))

    author_name = ''

    if 'family_name' in form_fields and form_fields['family_name']:
        author_name = form_fields['family_name'].strip() + ', '
    if 'given_names' in form_fields and form_fields['given_names']:
        author_name += form_fields['given_names']

    if author_name:
        data.get('name', {})['value'] = author_name

    # Add comments to extra data
    if 'extra_comments' in form_fields and form_fields['extra_comments']:
        data.setdefault('_private_notes', []).append({
            'source':
            'submitter',
            'value':
            form_fields['extra_comments']
        })

    data['stub'] = False

    # ==========
    # Submitter Info
    # ==========
    try:
        user_email = User.query.get(obj.id_user).email
    except AttributeError:
        user_email = ''
    try:
        orcid = UserIdentity.query.filter_by(id_user=obj.id_user,
                                             method='orcid').one().id
    except NoResultFound:
        orcid = ''
    data['acquisition_source'] = dict(
        email=user_email,
        datetime=datetime.datetime.utcnow().isoformat(),
        method="submitter",
        orcid=orcid,
        submission_number=str(obj.id),
        internal_uid=int(obj.id_user),
    )

    data = strip_empty_values(data)

    validate(data, 'authors')

    return data
Exemple #11
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match literature data model."""
    form_fields = copy.deepcopy(formdata)
    filter_empty_elements(form_fields,
                          ['authors', 'supervisors', 'report_numbers'])

    data = literature.do(form_fields)

    # Add extra fields that need to be computed or depend on other
    # fields.
    #
    # ======
    # Schema
    # ======
    if '$schema' in data and not data['$schema'].startswith('http'):
        jsonschemas_ext = current_app.extensions.get('invenio-jsonschemas')
        data['$schema'] = jsonschemas_ext.path_to_url("records/{0}".format(
            data['$schema']))

    # ============================
    # Collection
    # ============================
    data['collections'] = [{'primary': "HEP"}]
    if form_fields['type_of_doc'] == 'thesis':
        data['collections'].append({'primary': "THESIS"})

    if get_value(form_fields, "arxiv_eprints.categories", None):
        # Check if it was imported from arXiv
        data['collections'].extend([{
            'primary': "arXiv"
        }, {
            'primary': "Citeable"
        }])
        # Add arXiv as source
        if data.get("abstracts"):
            data['abstracts'][0]['source'] = 'arXiv'
        if form_fields.get("arxiv_id"):
            data['external_system_numbers'] = [{
                'value':
                'oai:arXiv.org:' + form_fields['arxiv_id'],
                'institute':
                'arXiv'
            }]
    if "publication_info" in data:
        pub_keys = data['publication_info'][0].keys()

        has_pub_info = all([
            key in pub_keys
            for key in ('year', 'journal_issue', 'journal_volume')
        ])
        has_page_or_artid = any(
            [key in pub_keys for key in ('page_start', 'page_end', 'artid')])

        if has_pub_info and has_page_or_artid:
            # NOTE: Only peer reviewed journals should have this collection
            # we are adding it here but ideally should be manually added
            # by a curator.
            data['collections'].append({'primary': "Published"})
            # Add Citeable collection if not present
            collections = [x['primary'] for x in data['collections']]
            if "Citeable" not in collections:
                data['collections'].append({'primary': "Citeable"})
    # ============================
    # Title source and cleanup
    # ============================
    try:
        # Clean up all extra spaces in title
        data['titles'][0]['title'] = " ".join(
            data['titles'][0]['title'].split())
        title = data['titles'][0]['title']
    except (KeyError, IndexError):
        title = ""
    if form_fields.get('title_arXiv'):
        title_arxiv = " ".join(form_fields.get('title_arXiv').split())
        if title == title_arxiv:
            data['titles'][0]["source"] = "arXiv"
        else:
            data['titles'].append({'title': title_arxiv, 'source': "arXiv"})
    if form_fields.get('title_crossref'):
        title_crossref = " ".join(form_fields.get('title_crossref').split())
        if title == title_crossref:
            data['titles'][0]["source"] = "CrossRef"
        else:
            data['titles'].append({
                'title': title_crossref,
                'source': "CrossRef"
            })
    try:
        data['titles'][0]['source']
    except KeyError:
        # Title has no source, so should be the submitter
        data['titles'][0]['source'] = "submitter"

    # ============================
    # Conference name
    # ============================
    if 'conf_name' in form_fields:
        if 'nonpublic_note' in form_fields:
            data.setdefault("hidden_notes",
                            []).append({"value": form_fields['conf_name']})
            data['hidden_notes'].append(
                {'value': form_fields['nonpublic_note']})
        else:
            data.setdefault("hidden_notes",
                            []).append({"value": form_fields['conf_name']})
        data['collections'].extend([{'primary': "ConferencePaper"}])

    # ============================
    # Page number
    # ============================
    if 'page_nr' not in data:
        first_publication_info = data.get('publication_info', [{}])[0]

        page_start = first_publication_info.get('page_start')
        page_end = first_publication_info.get('page_end')

        if page_start and page_end:
            try:
                data['page_nr'] = int(page_end) - int(page_start) + 1
            except (TypeError, ValueError):
                pass

    # ============================
    # Language
    # ============================
    if form_fields.get('language') == 'oth':
        if form_fields.get("other_language"):
            data["languages"] = [form_fields["other_language"]]

    # ==========
    # Owner Info
    # ==========
    # TODO Make sure we are getting the email correctly
    userid = obj.id_user
    try:
        email = User.query.get(userid).email
    except AttributeError:
        email = ''
    try:
        # TODO Make sure we are getting the ORCID id correctly
        source = UserIdentity.query.filter_by(id_user=userid,
                                              method='orcid').one()
    except NoResultFound:
        source = ''
    if source:
        source = source.method + ':' + source.id
    data['acquisition_source'] = dict(
        source=source,
        email=email,
        date=date.today().isoformat(),
        method="submission",
        submission_number=str(obj.id),
    )
    # ==============
    # Extra comments
    # ==============
    if form_fields.get('extra_comments'):
        data.setdefault('hidden_notes', []).append({
            'value':
            form_fields['extra_comments'],
            'source':
            'submitter'
        })
    # ==========================
    # Journal name normalization
    # ==========================
    journal_title = get_value(data, 'publication_info[0].journal_title')
    if journal_title:
        hits = JournalsSearch().query(
            'match',
            title_variants__title__lowercased=journal_title).execute()

        if hits:
            try:
                short_title = hits[0].short_titles[0].title
                data['publication_info'][0]['journal_title'] = short_title
            except (AttributeError, IndexError):
                pass

    # Finally, return the converted data
    return data