コード例 #1
0
        # 773 RefExtract PubNote extraction
        for field in record_get_field_instances(record, '773'):
            for value in field_get_subfield_values(field, 'x'):
                extract = extract_journal_reference(value)
                if extract:
                    subfields = [('x', value)]
                    if extract.get('volume', False):
                        subfields.append(('v', str(extract['volume'])))
                    if extract.get('title', False):
                        subfields.append(('p', str(extract['title'])))
                    if extract.get('year', False):
                        subfields.append(('y', str(extract['year'])))
                    if extract.get('page', False):
                        subfields.append(('c', str(extract['page'])))
                    new_field = create_field(subfields,
                                             global_position=field[4])
                    record_replace_field(record, '773', new_field, field[4])
                    break

        if not recid or recid == -1:
            # Record (probably) does not exist, flag for inserting into database
            # FIXME: Add some automatic deny/accept parameters, perhaps also bibmatch call
            insert_records.append(record)
        else:
            # Record exists, fetch existing record
            existing_record = get_record(recid)
            if existing_record is None:
                # Did not find existing record in database
                holdingpen_records.append(record)
                continue
コード例 #2
0
        # 773 RefExtract PubNote extraction
        for field in record_get_field_instances(record, '773'):
            for value in field_get_subfield_values(field, 'x'):
                extract = extract_journal_reference(value)
                if extract:
                    subfields = [('x', value)]
                    if extract.get('volume', False):
                        subfields.append(('v', str(extract['volume'])))
                    if extract.get('title', False):
                        subfields.append(('p', str(extract['title'])))
                    if extract.get('year', False):
                        subfields.append(('y', str(extract['year'])))
                    if extract.get('page', False):
                        subfields.append(('c', str(extract['page'])))
                    new_field = create_field(subfields, global_position=field[4])
                    record_replace_field(record, '773', new_field, field[4])
                    break

        # 710 Remove collaboration from a 710__g
        for field in record_get_field_instances(record, '710'):
            for value in field_get_subfield_values(field, 'g'):
                subfields = [('g', value.replace("collaboration", "").replace("Collaboration", "").strip())]
                new_field = create_field(subfields, global_position=field[4])
                record_replace_field(record, '710', new_field, field[4])
                break

        if not recid or recid == -1:
            # Record (probably) does not exist, flag for inserting into database
            # FIXME: Add some automatic deny/accept parameters, perhaps also bibmatch call
            insert_records.append(record)
コード例 #3
0
def apply_filter(rec):
    """ Filters the record to be compatible within Inspire
    Parameters:
     * rec - dictionary: BibRecord structure
    Returns: dictionary, BibRecord structure
    """
    # Move recid from 001 to 035 if not hidden
    cds_id = rec['001'][0][3]
    if not 'hidden' in [
            x.lower() for x in record_get_field_values(rec, "980", code="a")
    ]:
        record_add_field(rec, '035', subfields=[('9', 'CDS'), ('a', cds_id)])
    # Clear control fields
    record_strip_controlfields(rec)

    # Clear other uninteresting fields
    interesting_fields = [
        "024", "041", "035", "037", "088", "100", "110", "111", "242", "245",
        "246", "260", "269", "300", "502", "650", "653", "693", "700", "710",
        "773", "856", "520", "500", "980"
    ]
    for tag in rec.keys():
        if tag not in interesting_fields:
            record_delete_fields(rec, tag)

    # 980 Determine Collections
    collections = set([])
    for value in record_get_field_values(rec, '980', code='a'):
        if 'NOTE' in value.upper():
            collections.add('NOTE')
        if 'THESIS' in value.upper():
            collections.add('THESIS')
        if 'CONFERENCEPAPER' in value.upper():
            collections.add('ConferencePaper')

    if is_published(rec):
        collections.add("PUBLISHED")
        collections.add("CITEABLE")

    if not 'NOTE' in collections:
        # TODO: Move this to a KB
        kb = [
            'ATLAS-CONF-', 'CMS-PAS-', 'ATL-', 'CMS-DP-', 'ALICE-INT-',
            'LHCb-PUB-'
        ]
        values = record_get_field_values(rec, "088", code='a')
        for val, rep in product(values, kb):
            if val.startswith(rep):
                collections.add('NOTE')
                break

    # 980 Arxiv tag
    if record_get_field_values(rec,
                               '035',
                               filter_subfield_code="a",
                               filter_subfield_value="arXiv"):
        collections.add("arXiv")

    # 980 HEP && CORE
    collections.add('HEP')
    collections.add('CORE')

    # 980 Conference Note
    if not 'ConferencePaper' in collections:
        for value in record_get_field_values(rec, '962', code='n'):
            if value[-2:].isdigit():
                collections.add('ConferencePaper')
                break

    record_delete_fields(rec, "980")

    intnote = record_get_field_values(rec,
                                      '690',
                                      filter_subfield_code="a",
                                      filter_subfield_value='INTNOTE')
    if intnote:
        val_088 = record_get_field_values(rec, '088', filter_subfield_code="a")
        for val in val_088:
            if 'CMS' in val:
                url = ('http://weblib.cern.ch/abstract?CERN-CMS' +
                       val.split('CMS', 1)[-1])
                record_add_field(rec, '856', ind1='4', subfields=[('u', url)])

    # 041 Language
    languages = get_languages()
    language_fields = record_get_field_instances(rec, '041')
    record_delete_fields(rec, "041")
    for field in language_fields:
        subs = field_get_subfields(field)
        if 'a' in subs:
            if "eng" in subs['a']:
                continue
            new_value = translate_config(subs['a'][0], languages)
            new_subs = [('a', new_value)]
            record_add_field(rec, "041", subfields=new_subs)

    # 035 Externals
    scn_035_fields = record_get_field_instances(rec, '035')
    forbidden_values = [
        "cercer", "inspire", "xx", "cern annual report", "cmscms", "wai01"
    ]
    for field in scn_035_fields:
        subs = field_get_subfields(field)
        if '9' in subs:
            if not 'a' in subs:
                continue
            for sub in subs['9']:
                if sub.lower() in forbidden_values:
                    break
            else:
                # No forbidden values (We did not "break")
                suffixes = [s.lower() for s in subs['9']]
                if 'spires' in suffixes:
                    new_subs = [('a', 'SPIRES-%s' % subs['a'][0])]
                    record_add_field(rec, '970', subfields=new_subs)
                    continue
        if 'a' in subs:
            for sub in subs['a']:
                if sub.lower() in forbidden_values:
                    record_delete_field(rec,
                                        tag="035",
                                        field_position_global=field[4])

    rep_088_fields = record_get_field_instances(rec, '088')
    for field in rep_088_fields:
        subs = field_get_subfields(field)
        if '9' in subs:
            for val in subs['9']:
                if val.startswith('P0') or val.startswith('CM-P0'):
                    sf = [('9', 'CERN'), ('b', val)]
                    record_add_field(rec, '595', subfields=sf)
        for key, val in field[0]:
            if key in ['a', '9'] and not val.startswith('SIS-'):
                record_add_field(rec, '037', subfields=[('a', val)])
    record_delete_fields(rec, "088")

    # 037 Externals also...
    rep_037_fields = record_get_field_instances(rec, '037')
    for field in rep_037_fields:
        subs = field_get_subfields(field)
        if 'a' in subs:
            for value in subs['a']:
                if 'arXiv' in value:
                    new_subs = [('a', value), ('9', 'arXiv')]
                    for fld in record_get_field_instances(rec, '695'):
                        for key, val in field_get_subfield_instances(fld):
                            if key == 'a':
                                new_subs.append(('c', val))
                                break
                    nf = create_field(subfields=new_subs)
                    record_replace_field(rec, '037', nf, field[4])
        for key, val in field[0]:
            if key in ['a', '9'] and val.startswith('SIS-'):
                record_delete_field(rec, '037', field_position_global=field[4])

    for field in record_get_field_instances(rec, '242'):
        record_add_field(rec, '246', subfields=field[0])
    record_delete_fields(rec, '242')

    # 269 Date normalization
    for field in record_get_field_instances(rec, '269'):
        for idx, (key, value) in enumerate(field[0]):
            if key == "c":
                field[0][idx] = ("c", convert_date_to_iso(value))
                record_delete_fields(rec, "260")

    if not 'THESIS' in collections:
        for field in record_get_field_instances(rec, '260'):
            record_add_field(rec, '269', subfields=field[0])
        record_delete_fields(rec, '260')

    # 300 page number
    for field in record_get_field_instances(rec, '300'):
        for idx, (key, value) in enumerate(field[0]):
            if key == 'a':
                if "mult." not in value and value != " p":
                    field[0][idx] = ('a', re.sub(r'[^\d-]+', '', value))
                else:
                    record_delete_field(rec,
                                        '300',
                                        field_position_global=field[4])
                    break

    # 100 & 700 punctuate author names
    author_names = record_get_field_instances(rec, '100')
    author_names.extend(record_get_field_instances(rec, '700'))
    for field in author_names:
        subs = field_get_subfields(field)
        if not 'i' in subs or 'XX' in subs['i']:
            if not 'j' in subs or 'YY' in subs['j']:
                for idx, (key, value) in enumerate(field[0]):
                    if key == 'a':
                        field[0][idx] = ('a', punctuate_authorname(value))

    # 700 -> 701 Thesis supervisors
    if 'THESIS' in collections:
        for field in record_get_field_instances(rec, '700'):
            record_add_field(rec, '701', subfields=field[0])
        record_delete_fields(rec, '700')

    # 501 move subfields
    fields_501 = record_get_field_instances(rec, '502')
    for idx, field in enumerate(fields_501):
        new_subs = []
        for key, value in field[0]:
            if key == 'a':
                new_subs.append(('b', value))
            elif key == 'b':
                new_subs.append(('c', value))
            elif key == 'c':
                new_subs.append(('d', value))
            else:
                new_subs.append((key, value))
        fields_501[idx] = field_swap_subfields(field, new_subs)

    # 650 Translate Categories
    categories = get_categories()
    category_fields = record_get_field_instances(rec,
                                                 '650',
                                                 ind1='1',
                                                 ind2='7')
    record_delete_fields(rec, "650")
    for field in category_fields:
        for idx, (key, value) in enumerate(field[0]):
            if key == 'a':
                new_value = translate_config(value, categories)
                if new_value != value:
                    new_subs = [('2', 'INSPIRE'), ('a', new_value)]
                else:
                    new_subs = [('2', 'SzGeCERN'), ('a', value)]
                record_add_field(rec,
                                 "650",
                                 ind1="1",
                                 ind2="7",
                                 subfields=new_subs)
                break

    # 653 Free Keywords
    for field in record_get_field_instances(rec, '653', ind1='1'):
        subs = field_get_subfields(field)
        new_subs = []
        if 'a' in subs:
            for val in subs['a']:
                new_subs.extend([('9', 'author'), ('a', val)])
        new_field = create_field(subfields=new_subs, ind1='1')
        record_replace_field(rec,
                             '653',
                             new_field,
                             field_position_global=field[4])

    experiments = get_experiments()
    # 693 Remove if 'not applicable'
    for field in record_get_field_instances(rec, '693'):
        subs = field_get_subfields(field)
        all_subs = subs.get('a', []) + subs.get('e', [])
        if 'not applicable' in [x.lower() for x in all_subs]:
            record_delete_field(rec, '693', field_position_global=field[4])
        new_subs = []
        experiment_a = ""
        experiment_e = ""
        for (key, value) in subs.iteritems():
            if key == 'a':
                experiment_a = value[0]
                new_subs.append((key, value[0]))
            elif key == 'e':
                experiment_e = value[0]
        experiment = "%s---%s" % (experiment_a.replace(" ", "-"), experiment_e)
        translated_experiments = translate_config(experiment, experiments)
        new_subs.append(("e", translated_experiments))
        record_delete_field(rec, tag="693", field_position_global=field[4])
        record_add_field(rec, "693", subfields=new_subs)

    # 710 Collaboration
    for field in record_get_field_instances(rec, '710'):
        subs = field_get_subfield_instances(field)
        for idx, (key, value) in enumerate(subs[:]):
            if key == '5':
                subs.pop(idx)
            elif value.startswith('CERN. Geneva'):
                subs.pop(idx)
        if len(subs) == 0:
            record_delete_field(rec, '710', field_position_global=field[4])

    # 773 journal translations
    journals = get_journals()
    for field in record_get_field_instances(rec, '773'):
        subs = field_get_subfield_instances(field)
        new_subs = []
        for idx, (key, value) in enumerate(subs):
            if key == 'p':
                new_subs.append((key, translate_config(value, journals)))
            else:
                new_subs.append((key, value))
        record_delete_field(rec, tag="773", field_position_global=field[4])
        record_add_field(rec, "773", subfields=new_subs)

    # FFT (856) Dealing with graphs
    figure_counter = 0
    for field in record_get_field_instances(rec, '856', ind1='4'):
        subs = field_get_subfields(field)

        newsubs = []
        remove = False

        if 'z' in subs:
            is_figure = [s for s in subs['z'] if "figure" in s.lower()]
            if is_figure and 'u' in subs:
                is_subformat = [
                    s for s in subs['u'] if "subformat" in s.lower()
                ]
                if not is_subformat:
                    url = subs['u'][0]
                    if url.endswith(".pdf"):
                        # We try to convert
                        fd, local_url = mkstemp(suffix=os.path.basename(url),
                                                dir=CFG_TMPSHAREDDIR)
                        os.close(fd)
                        _print("Downloading %s into %s" % (url, local_url),
                               verbose=5)
                        plotfile = ""
                        try:
                            plotfile = download_url(url=url,
                                                    download_to_file=local_url,
                                                    timeout=30.0)
                        except InvenioFileDownloadError:
                            _print(
                                "Download failed while attempting to reach %s. Skipping.."
                                % (url, ))
                            remove = True
                        if plotfile:
                            converted = convert_images([plotfile])
                            if converted:
                                url = converted.pop()
                                _print("Successfully converted %s to %s" %
                                       (local_url, url),
                                       verbose=5)
                            else:
                                _print("Conversion failed on %s" %
                                       (local_url, ))
                                url = None
                                remove = True
                    if url:
                        newsubs.append(('a', url))
                        newsubs.append(('t', 'Plot'))
                        figure_counter += 1
                        if 'y' in subs:
                            newsubs.append(
                                ('d',
                                 "%05d %s" % (figure_counter, subs['y'][0])))
                            newsubs.append(('n', subs['y'][0]))
                        else:
                            # Get basename without extension.
                            name = os.path.basename(
                                os.path.splitext(subs['u'][0])[0])
                            newsubs.append(
                                ('d', "%05d %s" % (figure_counter, name)))
                            newsubs.append(('n', name))

        if not newsubs and 'u' in subs:
            is_fulltext = [s for s in subs['u'] if ".pdf" in s]
            if is_fulltext:
                newsubs = [('t', 'INSPIRE-PUBLIC'), ('a', subs['u'][0])]

        if not newsubs and 'u' in subs:
            remove = True
            is_zipfile = [s for s in subs['u'] if ".zip" in s]
            if is_zipfile:
                url = is_zipfile[0]
                local_url = os.path.join(CFG_TMPSHAREDDIR,
                                         os.path.basename(url))
                _print("Downloading %s into %s" % (url, local_url), verbose=5)
                zipped_archive = ""
                try:
                    zipped_archive = download_url(url=is_zipfile[0],
                                                  download_to_file=local_url,
                                                  timeout=30.0)
                except InvenioFileDownloadError:
                    _print(
                        "Download failed while attempting to reach %s. Skipping.."
                        % (is_zipfile[0], ))
                    remove = True
                if zipped_archive:
                    unzipped_archive = unzip(zipped_archive)
                    list_of_pngs = locate("*.png", unzipped_archive)
                    for png in list_of_pngs:
                        if "_vti_" in png or "__MACOSX" in png:
                            continue
                        figure_counter += 1
                        plotsubs = []
                        plotsubs.append(('a', png))
                        caption = '%05d %s' % (figure_counter,
                                               os.path.basename(png))
                        plotsubs.append(('d', caption))
                        plotsubs.append(('t', 'Plot'))
                        record_add_field(rec, 'FFT', subfields=plotsubs)

        if not remove and not newsubs and 'u' in subs:
            urls = ('http://cdsweb.cern.ch', 'http://cms.cern.ch',
                    'http://cmsdoc.cern.ch', 'http://documents.cern.ch',
                    'http://preprints.cern.ch', 'http://cds.cern.ch')
            for val in subs['u']:
                if any(url in val for url in urls):
                    remove = True
                    break
                if val.endswith('ps.gz'):
                    remove = True

        if newsubs:
            record_add_field(rec, 'FFT', subfields=newsubs)
            remove = True

        if remove:
            record_delete_field(rec,
                                '856',
                                ind1='4',
                                field_position_global=field[4])

    # 500 - Preliminary results
    if "THESIS" not in collections:
        subs = [('a', "Preliminary results")]
        record_add_field(rec, "500", subfields=subs)

    for collection in collections:
        record_add_field(rec, '980', subfields=[('a', collection)])

    return rec
コード例 #4
0
def apply_filter(rec):
    """ Filters the record to be compatible within Inspire
    Parameters:
     * rec - dictionary: BibRecord structure
    Returns: dictionary, BibRecord structure
    """
    # Move recid from 001 to 035 if not hidden
    cds_id = rec['001'][0][3]
    if not 'hidden' in [x.lower() for x in record_get_field_values(rec, "980",
                                                                   code="a")]:
        record_add_field(rec, '035', subfields=[('9', 'CDS'), ('a', cds_id)])
    # Clear control fields
    record_strip_controlfields(rec)

    # Clear other uninteresting fields
    interesting_fields = ["024", "041", "035", "037", "088", "100",
                          "110", "111", "242", "245", "246", "260",
                          "269", "300", "502", "650", "653", "693",
                          "700", "710", "773", "856", "520", "500",
                          "980"]
    for tag in rec.keys():
        if tag not in interesting_fields:
            record_delete_fields(rec, tag)

    # 980 Determine Collections
    collections = set([])
    for value in record_get_field_values(rec, '980', code='a'):
        if 'NOTE' in value.upper():
            collections.add('NOTE')
        if 'THESIS' in value.upper():
            collections.add('THESIS')
        if 'CONFERENCEPAPER' in value.upper():
            collections.add('ConferencePaper')


    if is_published(rec):
        collections.add("PUBLISHED")
        collections.add("CITEABLE")

    if not 'NOTE' in collections:
        # TODO: Move this to a KB
        kb = ['ATLAS-CONF-', 'CMS-PAS-', 'ATL-', 'CMS-DP-',
              'ALICE-INT-', 'LHCb-PUB-']
        values = record_get_field_values(rec, "088", code='a')
        for val, rep in product(values, kb):
            if val.startswith(rep):
                collections.add('NOTE')
                break

    # 980 Arxiv tag
    if record_get_field_values(rec, '035', filter_subfield_code="a",
                               filter_subfield_value="arXiv"):
        collections.add("arXiv")

    # 980 HEP && CORE
    collections.add('HEP')
    collections.add('CORE')

    # 980 Conference Note
    if not 'ConferencePaper' in collections:
        for value in record_get_field_values(rec, '962', code='n'):
            if value[-2:].isdigit():
                collections.add('ConferencePaper')
                break

    record_delete_fields(rec, "980")

    intnote = record_get_field_values(rec, '690', filter_subfield_code="a",
                                      filter_subfield_value='INTNOTE')
    if intnote:
        val_088 = record_get_field_values(rec, '088', filter_subfield_code="a")
        for val in val_088:
            if 'CMS' in val:
                url = ('http://weblib.cern.ch/abstract?CERN-CMS' +
                       val.split('CMS', 1)[-1])
                record_add_field(rec, '856', ind1='4', subfields=[('u', url)])

    # 041 Language
    languages = get_languages()
    language_fields = record_get_field_instances(rec, '041')
    record_delete_fields(rec, "041")
    for field in language_fields:
        subs = field_get_subfields(field)
        if 'a' in subs:
            if "eng" in subs['a']:
                continue
            new_value = translate_config(subs['a'][0], languages)
            new_subs = [('a', new_value)]
            record_add_field(rec, "041", subfields=new_subs)

    # 035 Externals
    scn_035_fields = record_get_field_instances(rec, '035')
    forbidden_values = ["cercer",
                        "inspire",
                        "xx",
                        "cern annual report",
                        "cmscms",
                        "wai01"]
    for field in scn_035_fields:
        subs = field_get_subfields(field)
        if '9' in subs:
            if not 'a' in subs:
                continue
            for sub in subs['9']:
                if sub.lower() in forbidden_values:
                    break
            else:
                # No forbidden values (We did not "break")
                suffixes = [s.lower() for s in subs['9']]
                if 'spires' in suffixes:
                    new_subs = [('a', 'SPIRES-%s' % subs['a'][0])]
                    record_add_field(rec, '970', subfields=new_subs)
                    continue
        if 'a' in subs:
            for sub in subs['a']:
                if sub.lower() in forbidden_values:
                    record_delete_field(rec, tag="035",
                                        field_position_global=field[4])

    rep_088_fields = record_get_field_instances(rec, '088')
    for field in rep_088_fields:
        subs = field_get_subfields(field)
        if '9' in subs:
            for val in subs['9']:
                if val.startswith('P0') or val.startswith('CM-P0'):
                    sf = [('9', 'CERN'), ('b', val)]
                    record_add_field(rec, '595', subfields=sf)
        for key, val in field[0]:
            if key in ['a', '9'] and not val.startswith('SIS-'):
                record_add_field(rec, '037', subfields=[('a', val)])
    record_delete_fields(rec, "088")

    # 037 Externals also...
    rep_037_fields = record_get_field_instances(rec, '037')
    for field in rep_037_fields:
        subs = field_get_subfields(field)
        if 'a' in subs:
            for value in subs['a']:
                if 'arXiv' in value:
                    new_subs = [('a', value), ('9', 'arXiv')]
                    for fld in record_get_field_instances(rec,  '695'):
                        for key, val in field_get_subfield_instances(fld):
                            if key == 'a':
                                new_subs.append(('c', val))
                                break
                    nf = create_field(subfields=new_subs)
                    record_replace_field(rec, '037', nf, field[4])
        for key, val in field[0]:
            if key in ['a', '9'] and val.startswith('SIS-'):
                record_delete_field(rec, '037', field_position_global=field[4])

    for field in record_get_field_instances(rec, '242'):
        record_add_field(rec, '246', subfields=field[0])
    record_delete_fields(rec, '242')

    # 269 Date normalization
    for field in record_get_field_instances(rec, '269'):
        for idx, (key, value) in enumerate(field[0]):
            if key == "c":
                field[0][idx] = ("c", convert_date_to_iso(value))
                record_delete_fields(rec, "260")

    if not 'THESIS' in collections:
        for field in record_get_field_instances(rec, '260'):
            record_add_field(rec, '269', subfields=field[0])
        record_delete_fields(rec, '260')

    # 300 page number
    for field in record_get_field_instances(rec, '300'):
        for idx, (key, value) in enumerate(field[0]):
            if key == 'a':
                if "mult." not in value and value != " p":
                    field[0][idx] = ('a', re.sub(r'[^\d-]+', '', value))
                else:
                    record_delete_field(rec, '300',
                                        field_position_global=field[4])
                    break

    # 100 & 700 punctuate author names
    author_names = record_get_field_instances(rec, '100')
    author_names.extend(record_get_field_instances(rec, '700'))
    for field in author_names:
        subs = field_get_subfields(field)
        if not 'i' in subs or 'XX' in subs['i']:
            if not 'j' in subs or 'YY' in subs['j']:
                for idx, (key, value) in enumerate(field[0]):
                    if key == 'a':
                        field[0][idx] = ('a', punctuate_authorname(value))

    # 700 -> 701 Thesis supervisors
    if 'THESIS' in collections:
        for field in record_get_field_instances(rec, '700'):
            record_add_field(rec, '701', subfields=field[0])
        record_delete_fields(rec, '700')

    # 501 move subfields
    fields_501 = record_get_field_instances(rec, '502')
    for idx, field in enumerate(fields_501):
        new_subs = []
        for key, value in field[0]:
            if key == 'a':
                new_subs.append(('b', value))
            elif key == 'b':
                new_subs.append(('c', value))
            elif key == 'c':
                new_subs.append(('d', value))
            else:
                new_subs.append((key, value))
        fields_501[idx] = field_swap_subfields(field, new_subs)

    # 650 Translate Categories
    categories = get_categories()
    category_fields = record_get_field_instances(rec, '650', ind1='1', ind2='7')
    record_delete_fields(rec, "650")
    for field in category_fields:
        for idx, (key, value) in enumerate(field[0]):
            if key == 'a':
                new_value = translate_config(value, categories)
                if new_value != value:
                    new_subs = [('2', 'INSPIRE'), ('a', new_value)]
                else:
                    new_subs = [('2', 'SzGeCERN'), ('a', value)]
                record_add_field(rec, "650", ind1="1", ind2="7",
                                 subfields=new_subs)
                break

    # 653 Free Keywords
    for field in record_get_field_instances(rec, '653', ind1='1'):
        subs = field_get_subfields(field)
        new_subs = []
        if 'a' in subs:
            for val in subs['a']:
                new_subs.extend([('9', 'author'), ('a', val)])
        new_field = create_field(subfields=new_subs, ind1='1')
        record_replace_field(rec, '653', new_field, field_position_global=field[4])

    experiments = get_experiments()
    # 693 Remove if 'not applicable'
    for field in record_get_field_instances(rec, '693'):
        subs = field_get_subfields(field)
        all_subs = subs.get('a', []) + subs.get('e', [])
        if 'not applicable' in [x.lower() for x in all_subs]:
            record_delete_field(rec, '693',
                                field_position_global=field[4])
        new_subs = []
        experiment_a = ""
        experiment_e = ""
        for (key, value) in subs.iteritems():
            if key == 'a':
                experiment_a = value[0]
                new_subs.append((key, value[0]))
            elif key == 'e':
                experiment_e = value[0]
        experiment = "%s---%s" % (experiment_a.replace(" ", "-"),
                                  experiment_e)
        translated_experiments = translate_config(experiment,
                                                  experiments)
        new_subs.append(("e", translated_experiments))
        record_delete_field(rec, tag="693",
                            field_position_global=field[4])
        record_add_field(rec, "693", subfields=new_subs)

    # 710 Collaboration
    for field in record_get_field_instances(rec, '710'):
        subs = field_get_subfield_instances(field)
        for idx, (key, value) in enumerate(subs[:]):
            if key == '5':
                subs.pop(idx)
            elif value.startswith('CERN. Geneva'):
                subs.pop(idx)
        if len(subs) == 0:
            record_delete_field(rec, '710', field_position_global=field[4])

    # 773 journal translations
    journals = get_journals()
    for field in record_get_field_instances(rec, '773'):
        subs = field_get_subfield_instances(field)
        new_subs = []
        for idx, (key, value) in enumerate(subs):
            if key == 'p':
                new_subs.append((key, translate_config(value, journals)))
            else:
                new_subs.append((key, value))
        record_delete_field(rec, tag="773",
                            field_position_global=field[4])
        record_add_field(rec, "773", subfields=new_subs)

    # FFT (856) Dealing with graphs
    figure_counter = 0
    for field in record_get_field_instances(rec, '856', ind1='4'):
        subs = field_get_subfields(field)

        newsubs = []
        remove = False

        if 'z' in subs:
            is_figure = [s for s in subs['z'] if "figure" in s.lower()]
            if is_figure and 'u' in subs:
                is_subformat = [s for s in subs['u'] if "subformat" in s.lower()]
                if not is_subformat:
                    url = subs['u'][0]
                    if url.endswith(".pdf"):
                        # We try to convert
                        fd, local_url = mkstemp(suffix=os.path.basename(url), dir=CFG_TMPSHAREDDIR)
                        os.close(fd)
                        _print("Downloading %s into %s" % (url, local_url), verbose=5)
                        plotfile = ""
                        try:
                            plotfile = download_url(url=url,
                                                    download_to_file=local_url,
                                                    timeout=30.0)
                        except InvenioFileDownloadError:
                            _print("Download failed while attempting to reach %s. Skipping.." % (url,))
                            remove = True
                        if plotfile:
                            converted = convert_images([plotfile])
                            if converted:
                                url = converted.pop()
                                _print("Successfully converted %s to %s" % (local_url, url), verbose=5)
                            else:
                                _print("Conversion failed on %s" % (local_url,))
                                url = None
                                remove = True
                    if url:
                        newsubs.append(('a', url))
                        newsubs.append(('t', 'Plot'))
                        figure_counter += 1
                        if 'y' in subs:
                            newsubs.append(('d', "%05d %s" % (figure_counter, subs['y'][0])))
                            newsubs.append(('n', subs['y'][0]))
                        else:
                            # Get basename without extension.
                            name = os.path.basename(os.path.splitext(subs['u'][0])[0])
                            newsubs.append(('d', "%05d %s" % (figure_counter, name)))
                            newsubs.append(('n', name))

        if not newsubs and 'u' in subs:
            is_fulltext = [s for s in subs['u'] if ".pdf" in s and not "subformat=pdfa" in s]
            if is_fulltext:
                newsubs = [('t', 'INSPIRE-PUBLIC'), ('a', subs['u'][0])]

        if not newsubs and 'u' in subs:
            remove = True
            is_zipfile = [s for s in subs['u'] if ".zip" in s]
            if is_zipfile:
                url = is_zipfile[0]
                local_url = os.path.join(CFG_TMPSHAREDDIR, os.path.basename(url))
                _print("Downloading %s into %s" % (url, local_url), verbose=5)
                zipped_archive = ""
                try:
                    zipped_archive = download_url(url=is_zipfile[0],
                                                  download_to_file=local_url,
                                                  timeout=30.0)
                except InvenioFileDownloadError:
                    _print("Download failed while attempting to reach %s. Skipping.."
                           % (is_zipfile[0],))
                    remove = True
                if zipped_archive:
                    unzipped_archive = unzip(zipped_archive)
                    list_of_pngs = locate("*.png", unzipped_archive)
                    for png in list_of_pngs:
                        if "_vti_" in png or "__MACOSX" in png:
                            continue
                        figure_counter += 1
                        plotsubs = []
                        plotsubs.append(('a', png))
                        caption = '%05d %s' % (figure_counter, os.path.basename(png))
                        plotsubs.append(('d', caption))
                        plotsubs.append(('t', 'Plot'))
                        record_add_field(rec, 'FFT', subfields=plotsubs)

        if not remove and not newsubs and 'u' in subs:
            urls = ('http://cdsweb.cern.ch', 'http://cms.cern.ch',
                    'http://cmsdoc.cern.ch', 'http://documents.cern.ch',
                    'http://preprints.cern.ch', 'http://cds.cern.ch')
            for val in subs['u']:
                if any(url in val for url in urls):
                    remove = True
                    break
                if val.endswith('ps.gz'):
                    remove = True

        if newsubs:
            record_add_field(rec, 'FFT', subfields=newsubs)
            remove = True

        if remove:
            record_delete_field(rec, '856', ind1='4',
                                field_position_global=field[4])

    # 500 - Preliminary results
    if "THESIS" not in collections:
        subs = [('a', "Preliminary results")]
        record_add_field(rec, "500", subfields=subs)

    for collection in collections:
        record_add_field(rec, '980', subfields=[('a', collection)])

    return rec
コード例 #5
0
def apply_filter(rec):
    """ Filters the record to be compatible within Inspire
    Parameters:
     * rec - dictionary: BibRecord structure
    Returns: dictionary, BibRecord structure
    """
    # Move recid from 001 to 035 if not hidden
    cds_id = rec["001"][0][3]
    if not "hidden" in [x.lower() for x in record_get_field_values(rec, "980", code="a")]:
        record_add_field(rec, "035", subfields=[("9", "CDS"), ("a", cds_id)])
    # Clear control fields
    record_strip_controlfields(rec)

    # Clear other uninteresting fields
    interesting_fields = [
        "024",
        "041",
        "035",
        "037",
        "088",
        "100",
        "110",
        "111",
        "242",
        "245",
        "246",
        "260",
        "269",
        "300",
        "502",
        "650",
        "653",
        "693",
        "700",
        "710",
        "773",
        "856",
        "520",
        "500",
        "980",
    ]
    for tag in rec.keys():
        if tag not in interesting_fields:
            record_delete_fields(rec, tag)

    # 980 Determine Collections
    collections = set([])
    for value in record_get_field_values(rec, "980", code="a"):
        if "NOTE" in value.upper():
            collections.add("NOTE")
        if "THESIS" in value.upper():
            collections.add("THESIS")
        if "CONFERENCEPAPER" in value.upper():
            collections.add("ConferencePaper")

    if is_published(rec):
        collections.add("PUBLISHED")
        collections.add("CITEABLE")

    if not "NOTE" in collections:
        # TODO: Move this to a KB
        kb = ["ATLAS-CONF-", "CMS-PAS-", "ATL-", "CMS-DP-", "ALICE-INT-", "LHCb-PUB-"]
        values = record_get_field_values(rec, "088", code="a")
        for val, rep in product(values, kb):
            if val.startswith(rep):
                collections.add("NOTE")
                break

    # 980 Arxiv tag
    if record_get_field_values(rec, "035", filter_subfield_code="a", filter_subfield_value="arXiv"):
        collections.add("arXiv")

    # 980 HEP && CORE
    collections.add("HEP")
    collections.add("CORE")

    # 980 Conference Note
    if not "ConferencePaper" in collections:
        for value in record_get_field_values(rec, "962", code="n"):
            if value[-2:].isdigit():
                collections.add("ConferencePaper")
                break

    record_delete_fields(rec, "980")

    intnote = record_get_field_values(rec, "690", filter_subfield_code="a", filter_subfield_value="INTNOTE")
    if intnote:
        val_088 = record_get_field_values(rec, "088", filter_subfield_code="a")
        for val in val_088:
            if "CMS" in val:
                url = "http://weblib.cern.ch/abstract?CERN-CMS" + val.split("CMS", 1)[-1]
                record_add_field(rec, "856", ind1="4", subfields=[("u", url)])

    # 041 Language
    languages = get_languages()
    language_fields = record_get_field_instances(rec, "041")
    record_delete_fields(rec, "041")
    for field in language_fields:
        subs = field_get_subfields(field)
        if "a" in subs:
            if "eng" in subs["a"]:
                continue
            new_value = translate_config(subs["a"][0], languages)
            new_subs = [("a", new_value)]
            record_add_field(rec, "041", subfields=new_subs)

    # 035 Externals
    scn_035_fields = record_get_field_instances(rec, "035")
    forbidden_values = ["cercer", "inspire", "xx", "cern annual report", "cmscms", "wai01"]
    for field in scn_035_fields:
        subs = field_get_subfields(field)
        if "9" in subs:
            if not "a" in subs:
                continue
            for sub in subs["9"]:
                if sub.lower() in forbidden_values:
                    break
            else:
                # No forbidden values (We did not "break")
                suffixes = [s.lower() for s in subs["9"]]
                if "spires" in suffixes:
                    new_subs = [("a", "SPIRES-%s" % subs["a"][0])]
                    record_add_field(rec, "970", subfields=new_subs)
                    continue
        if "a" in subs:
            for sub in subs["a"]:
                if sub.lower() in forbidden_values:
                    record_delete_field(rec, tag="035", field_position_global=field[4])

    rep_088_fields = record_get_field_instances(rec, "088")
    for field in rep_088_fields:
        subs = field_get_subfields(field)
        if "9" in subs:
            for val in subs["9"]:
                if val.startswith("P0") or val.startswith("CM-P0"):
                    sf = [("9", "CERN"), ("b", val)]
                    record_add_field(rec, "595", subfields=sf)
        for key, val in field[0]:
            if key in ["a", "9"] and not val.startswith("SIS-"):
                record_add_field(rec, "037", subfields=[("a", val)])
    record_delete_fields(rec, "088")

    # 037 Externals also...
    rep_037_fields = record_get_field_instances(rec, "037")
    for field in rep_037_fields:
        subs = field_get_subfields(field)
        if "a" in subs:
            for value in subs["a"]:
                if "arXiv" in value:
                    new_subs = [("a", value), ("9", "arXiv")]
                    for fld in record_get_field_instances(rec, "695"):
                        for key, val in field_get_subfield_instances(fld):
                            if key == "a":
                                new_subs.append(("c", val))
                                break
                    nf = create_field(subfields=new_subs)
                    record_replace_field(rec, "037", nf, field[4])
        for key, val in field[0]:
            if key in ["a", "9"] and val.startswith("SIS-"):
                record_delete_field(rec, "037", field_position_global=field[4])

    for field in record_get_field_instances(rec, "242"):
        record_add_field(rec, "246", subfields=field[0])
    record_delete_fields(rec, "242")

    # 269 Date normalization
    for field in record_get_field_instances(rec, "269"):
        for idx, (key, value) in enumerate(field[0]):
            if key == "c":
                field[0][idx] = ("c", convert_date_to_iso(value))
                record_delete_fields(rec, "260")

    if not "THESIS" in collections:
        for field in record_get_field_instances(rec, "260"):
            record_add_field(rec, "269", subfields=field[0])
        record_delete_fields(rec, "260")

    # 300 page number
    for field in record_get_field_instances(rec, "300"):
        for idx, (key, value) in enumerate(field[0]):
            if key == "a":
                if "mult." not in value and value != " p":
                    field[0][idx] = ("a", re.sub(r"[^\d-]+", "", value))
                else:
                    record_delete_field(rec, "300", field_position_global=field[4])
                    break

    # 100 & 700 punctuate author names
    author_names = record_get_field_instances(rec, "100")
    author_names.extend(record_get_field_instances(rec, "700"))
    for field in author_names:
        subs = field_get_subfields(field)
        if not "i" in subs or "XX" in subs["i"]:
            if not "j" in subs or "YY" in subs["j"]:
                for idx, (key, value) in enumerate(field[0]):
                    if key == "a":
                        field[0][idx] = ("a", punctuate_authorname(value))

    # 700 -> 701 Thesis supervisors
    if "THESIS" in collections:
        for field in record_get_field_instances(rec, "700"):
            record_add_field(rec, "701", subfields=field[0])
        record_delete_fields(rec, "700")

    # 501 move subfields
    fields_501 = record_get_field_instances(rec, "502")
    for idx, field in enumerate(fields_501):
        new_subs = []
        for key, value in field[0]:
            if key == "a":
                new_subs.append(("b", value))
            elif key == "b":
                new_subs.append(("c", value))
            elif key == "c":
                new_subs.append(("d", value))
            else:
                new_subs.append((key, value))
        fields_501[idx] = field_swap_subfields(field, new_subs)

    # 650 Translate Categories
    categories = get_categories()
    category_fields = record_get_field_instances(rec, "650", ind1="1", ind2="7")
    record_delete_fields(rec, "650")
    for field in category_fields:
        for idx, (key, value) in enumerate(field[0]):
            if key == "a":
                new_value = translate_config(value, categories)
                if new_value != value:
                    new_subs = [("2", "INSPIRE"), ("a", new_value)]
                else:
                    new_subs = [("2", "SzGeCERN"), ("a", value)]
                record_add_field(rec, "650", ind1="1", ind2="7", subfields=new_subs)
                break

    # 653 Free Keywords
    for field in record_get_field_instances(rec, "653", ind1="1"):
        subs = field_get_subfields(field)
        new_subs = []
        if "a" in subs:
            for val in subs["a"]:
                new_subs.extend([("9", "author"), ("a", val)])
        new_field = create_field(subfields=new_subs, ind1="1")
        record_replace_field(rec, "653", new_field, field_position_global=field[4])

    experiments = get_experiments()
    # 693 Remove if 'not applicable'
    for field in record_get_field_instances(rec, "693"):
        subs = field_get_subfields(field)
        all_subs = subs.get("a", []) + subs.get("e", [])
        if "not applicable" in [x.lower() for x in all_subs]:
            record_delete_field(rec, "693", field_position_global=field[4])
        new_subs = []
        experiment_a = ""
        experiment_e = ""
        for (key, value) in subs.iteritems():
            if key == "a":
                experiment_a = value[0]
                new_subs.append((key, value[0]))
            elif key == "e":
                experiment_e = value[0]
        experiment = "%s---%s" % (experiment_a.replace(" ", "-"), experiment_e)
        translated_experiments = translate_config(experiment, experiments)
        new_subs.append(("e", translated_experiments))
        record_delete_field(rec, tag="693", field_position_global=field[4])
        record_add_field(rec, "693", subfields=new_subs)

    # 710 Collaboration
    for field in record_get_field_instances(rec, "710"):
        subs = field_get_subfield_instances(field)
        for idx, (key, value) in enumerate(subs[:]):
            if key == "5":
                subs.pop(idx)
            elif value.startswith("CERN. Geneva"):
                subs.pop(idx)
        if len(subs) == 0:
            record_delete_field(rec, "710", field_position_global=field[4])

    # 773 journal translations
    journals = get_journals()
    for field in record_get_field_instances(rec, "773"):
        subs = field_get_subfield_instances(field)
        new_subs = []
        for idx, (key, value) in enumerate(subs):
            if key == "p":
                new_subs.append((key, translate_config(value, journals)))
            else:
                new_subs.append((key, value))
        record_delete_field(rec, tag="773", field_position_global=field[4])
        record_add_field(rec, "773", subfields=new_subs)

    # FFT (856) Dealing with graphs
    figure_counter = 0
    for field in record_get_field_instances(rec, "856", ind1="4"):
        subs = field_get_subfields(field)

        newsubs = []
        remove = False

        if "z" in subs:
            is_figure = [s for s in subs["z"] if "figure" in s.lower()]
            if is_figure and "u" in subs:
                is_subformat = [s for s in subs["u"] if "subformat" in s.lower()]
                if not is_subformat:
                    url = subs["u"][0]
                    if url.endswith(".pdf"):
                        # We try to convert
                        fd, local_url = mkstemp(suffix=os.path.basename(url), dir=CFG_TMPSHAREDDIR)
                        os.close(fd)
                        _print("Downloading %s into %s" % (url, local_url), verbose=5)
                        plotfile = ""
                        try:
                            plotfile = download_url(url=url, download_to_file=local_url, timeout=30.0)
                        except InvenioFileDownloadError:
                            _print("Download failed while attempting to reach %s. Skipping.." % (url,))
                            remove = True
                        if plotfile:
                            converted = convert_images([plotfile])
                            if converted:
                                url = converted.pop()
                                _print("Successfully converted %s to %s" % (local_url, url), verbose=5)
                            else:
                                _print("Conversion failed on %s" % (local_url,))
                                url = None
                                remove = True
                    if url:
                        newsubs.append(("a", url))
                        newsubs.append(("t", "Plot"))
                        figure_counter += 1
                        if "y" in subs:
                            newsubs.append(("d", "%05d %s" % (figure_counter, subs["y"][0])))
                            newsubs.append(("n", subs["y"][0]))
                        else:
                            # Get basename without extension.
                            name = os.path.basename(os.path.splitext(subs["u"][0])[0])
                            newsubs.append(("d", "%05d %s" % (figure_counter, name)))
                            newsubs.append(("n", name))

        if not newsubs and "u" in subs:
            is_fulltext = [s for s in subs["u"] if ".pdf" in s]
            if is_fulltext:
                newsubs = [("t", "INSPIRE-PUBLIC"), ("a", subs["u"][0])]

        if not newsubs and "u" in subs:
            remove = True
            is_zipfile = [s for s in subs["u"] if ".zip" in s]
            if is_zipfile:
                url = is_zipfile[0]
                local_url = os.path.join(CFG_TMPSHAREDDIR, os.path.basename(url))
                _print("Downloading %s into %s" % (url, local_url), verbose=5)
                zipped_archive = ""
                try:
                    zipped_archive = download_url(url=is_zipfile[0], download_to_file=local_url, timeout=30.0)
                except InvenioFileDownloadError:
                    _print("Download failed while attempting to reach %s. Skipping.." % (is_zipfile[0],))
                    remove = True
                if zipped_archive:
                    unzipped_archive = unzip(zipped_archive)
                    list_of_pngs = locate("*.png", unzipped_archive)
                    for png in list_of_pngs:
                        if "_vti_" in png or "__MACOSX" in png:
                            continue
                        figure_counter += 1
                        plotsubs = []
                        plotsubs.append(("a", png))
                        caption = "%05d %s" % (figure_counter, os.path.basename(png))
                        plotsubs.append(("d", caption))
                        plotsubs.append(("t", "Plot"))
                        record_add_field(rec, "FFT", subfields=plotsubs)

        if not remove and not newsubs and "u" in subs:
            urls = (
                "http://cdsweb.cern.ch",
                "http://cms.cern.ch",
                "http://cmsdoc.cern.ch",
                "http://documents.cern.ch",
                "http://preprints.cern.ch",
                "http://cds.cern.ch",
            )
            for val in subs["u"]:
                if any(url in val for url in urls):
                    remove = True
                    break
                if val.endswith("ps.gz"):
                    remove = True

        if newsubs:
            record_add_field(rec, "FFT", subfields=newsubs)
            remove = True

        if remove:
            record_delete_field(rec, "856", ind1="4", field_position_global=field[4])

    # 500 - Preliminary results
    if "THESIS" not in collections:
        subs = [("a", "Preliminary results")]
        record_add_field(rec, "500", subfields=subs)

    for collection in collections:
        record_add_field(rec, "980", subfields=[("a", collection)])

    return rec
コード例 #6
0
def perform_request_update_record(request_type, recid, uid, cacheMTime, data, changeApplied, isBulk=False):
    """Handle record update requests like adding, modifying, moving or deleting
    of fields or subfields. Possible common error situations:
    - Missing cache file
    - Cache file modified in other editor
    """

    response = {}

    if not cache_exists(recid, uid):
        response['resultCode'] = 106
    elif not get_cache_mtime(recid, uid) == cacheMTime and isBulk == False:
        # In case of a bulk request, the changes are deliberately performed imemdiately one after another
        response['resultCode'] = 107
    else:
        try:
            record_revision, record, pending_changes, desactivated_hp_changes = get_cache_file_contents(recid, uid)[1:]
        except:
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['wrong_cache_file_format']
            return response;

        if changeApplied != -1:
            pending_changes = pending_changes[:changeApplied] + pending_changes[changeApplied+1:]

        field_position_local = data.get('fieldPosition')
        if field_position_local is not None:
            field_position_local = int(field_position_local)
        if request_type == 'overrideChangesList':
            pending_changes = data['newChanges']
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['editor_modifications_changed']
        elif request_type == 'removeChange':
            #the change is removed automatically by passing the changeApplied parameter
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['editor_modifications_changed']
        elif request_type == 'desactivateHoldingPenChangeset':
            # the changeset has been marked as processed ( user applied it in the editor)
            # marking as used in the cache file
            # CAUTION: This function has been implemented here because logically it fits
            #          with the modifications made to the cache file. No changes are made to the
            #          Holding Pen physically. The changesets are related to the cache because
            #          we want to cancel the removal every time the cache disappears for any reason
            desactivated_hp_changes[data.get('desactivatedChangeset')] = True;
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['disabled_hp_changeset']
        elif request_type == 'addField':
            if data['controlfield']:
                record_add_field(record, data['tag'],
                                 controlfield_value=data['value'])
                response['resultCode'] = 20
            else:
                record_add_field(record, data['tag'], data['ind1'],
                                 data['ind2'], subfields=data['subfields'],
                                 field_position_local=field_position_local)
                response['resultCode'] = 21

        elif request_type == 'addSubfields':
            subfields = data['subfields']
            for subfield in subfields:
                record_add_subfield_into(record, data['tag'], subfield[0],
                    subfield[1], subfield_position=None,
                    field_position_local=field_position_local)
            if len(subfields) == 1:
                response['resultCode'] = 22
            else:
                response['resultCode'] = 23
        elif request_type == 'modifyField': # changing the field structure
            # first remove subfields and then add new... change the indices
            subfields = data['subFields'] # parse the JSON representation of the subfields here

            new_field = create_field(subfields, data['ind1'], data['ind2']);
            record_replace_field(record, data['tag'], new_field, field_position_local = data['fieldPosition'])
            response['resultCode'] = 26
            #response['debuggingValue'] = data['subFields'];

        elif request_type == 'modifyContent':
            if data['subfieldIndex'] != None:
                record_modify_subfield(record, data['tag'],
                    data['subfieldCode'], data['value'],
                    int(data['subfieldIndex']),
                    field_position_local=field_position_local)
            else:
                record_modify_controlfield(record, data['tag'], data["value"],
                  field_position_local=field_position_local)
            response['resultCode'] = 24

        elif request_type == 'moveSubfield':
            record_move_subfield(record, data['tag'],
                int(data['subfieldIndex']), int(data['newSubfieldIndex']),
                field_position_local=field_position_local)
            response['resultCode'] = 25

        elif request_type == 'moveField':
            if data['direction'] == 'up':
                final_position_local = field_position_local-1
            else: # direction is 'down'
                final_position_local = field_position_local+1
            record_move_fields(record, data['tag'], [field_position_local],
                final_position_local)
            response['resultCode'] = 32

        elif request_type == 'deleteFields':
            to_delete = data['toDelete']
            deleted_fields = 0
            deleted_subfields = 0
            for tag in to_delete:
                # Sorting the fields in a edcreasing order by the local position !
                fieldsOrder = to_delete[tag].keys()
                fieldsOrder.sort(lambda a,b: int(b)-int(a))
                for field_position_local in fieldsOrder:
                    if not to_delete[tag][field_position_local]:
                        # No subfields specified - delete entire field.
                        record_delete_field(record, tag,
                            field_position_local=int(field_position_local))
                        deleted_fields += 1
                    else:
                        for subfield_position in \
                                to_delete[tag][field_position_local][::-1]:
                            # Delete subfields in reverse order (to keep the
                            # indexing correct).
                            record_delete_subfield_from(record, tag,
                                int(subfield_position),
                                field_position_local=int(field_position_local))
                            deleted_subfields += 1
            if deleted_fields == 1 and deleted_subfields == 0:
                response['resultCode'] = 26
            elif deleted_fields and deleted_subfields == 0:
                response['resultCode'] = 27
            elif deleted_subfields == 1 and deleted_fields == 0:
                response['resultCode'] = 28
            elif deleted_subfields and deleted_fields == 0:
                response['resultCode'] = 29
            else:
                response['resultCode'] = 30
        response['cacheMTime'], response['cacheDirty'] = \
            update_cache_file_contents(recid, uid, record_revision, record, \
                                       pending_changes, desactivated_hp_changes), \
            True

    return response
コード例 #7
0
def perform_request_update_record(request_type, recid, uid, cacheMTime, data, \
                                  hpChanges, undoRedoOp, isBulk=False):
    """Handle record update requests like adding, modifying, moving or deleting
    of fields or subfields. Possible common error situations:
    - Missing cache file
    - Cache file modified in other editor
    Explanation of some parameters:
       undoRedoOp - Indicates in "undo"/"redo"/undo_descriptor operation is
                    performed by a current request.
    """

    response = {}

    if not cache_exists(recid, uid):
        response['resultCode'] = 106
    elif not get_cache_mtime(recid, uid) == cacheMTime and isBulk == False:
        # In case of a bulk request, the changes are deliberately performed
        # imemdiately one after another
        response['resultCode'] = 107
    else:
        try:
            record_revision, record, pending_changes, deactivated_hp_changes, \
                undo_list, redo_list = get_cache_file_contents(recid, uid)[1:]
        except:
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \
                'wrong_cache_file_format']
            return response

        # process all the Holding Pen changes operations ... regardles the
        # request type
#        import rpdb2;
#        rpdb2.start_embedded_debugger('password', fAllowRemote=True)
        if hpChanges.has_key("toDisable"):
            for changeId in hpChanges["toDisable"]:
                pending_changes[changeId]["applied_change"] = True

        if hpChanges.has_key("toEnable"):
            for changeId in hpChanges["toEnable"]:
                pending_changes[changeId]["applied_change"] = False

        if hpChanges.has_key("toOverride"):
            pending_changes = hpChanges["toOverride"]

        if hpChanges.has_key("changesetsToDeactivate"):
            for changesetId in hpChanges["changesetsToDeactivate"]:
                deactivated_hp_changes[changesetId] = True

        if hpChanges.has_key("changesetsToActivate"):
            for changesetId in hpChanges["changesetsToActivate"]:
                deactivated_hp_changes[changesetId] = False

        # processing the undo/redo entries
        if undoRedoOp == "undo":
            try:
                redo_list = [undo_list[-1]] + redo_list
                undo_list = undo_list[:-1]
            except:
                raise Exception("An exception occured when undoing previous" + \
                                " operation. Undo list: " + str(undo_list) + \
                                " Redo list " + str(redo_list))
        elif undoRedoOp == "redo":
            try:
                undo_list = undo_list + [redo_list[0]]
                redo_list = redo_list[1:]
            except:
                raise Exception("An exception occured when redoing previous" + \
                                " operation. Undo list: " + str(undo_list) + \
                                " Redo list " + str(redo_list))
        else:
            # This is a genuine operation - we have to add a new descriptor
            # to the undo list and cancel the redo unless the operation is
            # a bulk operation
            if undoRedoOp != None:
                undo_list = undo_list + [undoRedoOp]
                redo_list = []
            else:
                assert isBulk == True

        field_position_local = data.get('fieldPosition')
        if field_position_local is not None:
            field_position_local = int(field_position_local)
        if request_type == 'otherUpdateRequest':
            # An empty request. Might be useful if we want to perform
            # operations that require only the actions performed globally,
            # like modifying the holdingPen changes list
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \
                'editor_modifications_changed']
        elif request_type == 'deactivateHoldingPenChangeset':
            # the changeset has been marked as processed ( user applied it in
            # the editor). Marking as used in the cache file.
            # CAUTION: This function has been implemented here because logically
            #          it fits with the modifications made to the cache file.
            #          No changes are made to the Holding Pen physically. The
            #          changesets are related to the cache because we want to
            #          cancel the removal every time the cache disappears for
            #          any reason
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \
                'disabled_hp_changeset']
        elif request_type == 'addField':
            if data['controlfield']:
                record_add_field(record, data['tag'],
                                 controlfield_value=data['value'])
                response['resultCode'] = 20
            else:
                record_add_field(record, data['tag'], data['ind1'],
                                 data['ind2'], subfields=data['subfields'],
                                 field_position_local=field_position_local)
                response['resultCode'] = 21

        elif request_type == 'addSubfields':
            subfields = data['subfields']
            for subfield in subfields:
                record_add_subfield_into(record, data['tag'], subfield[0],
                    subfield[1], subfield_position=None,
                    field_position_local=field_position_local)
            if len(subfields) == 1:
                response['resultCode'] = 22
            else:
                response['resultCode'] = 23
        elif request_type == 'addFieldsSubfieldsOnPositions':
            #1) Sorting the fields by their identifiers
            fieldsToAdd = data['fieldsToAdd']
            subfieldsToAdd = data['subfieldsToAdd']
            for tag in fieldsToAdd.keys():
                positions = fieldsToAdd[tag].keys()
                positions.sort()
                for position in positions:
                    # now adding fields at a position

                    isControlfield = (len(fieldsToAdd[tag][position][0]) == 0)
                    # if there are n subfields, this is a control field
                    if isControlfield:
                        controlfieldValue = fieldsToAdd[tag][position][3]
                        record_add_field(record, tag, field_position_local = \
                                             int(position), \
                                             controlfield_value = \
                                                 controlfieldValue)
                    else:
                        subfields = fieldsToAdd[tag][position][0]
                        ind1 = fieldsToAdd[tag][position][1]
                        ind2 = fieldsToAdd[tag][position][2]
                        record_add_field(record, tag, ind1, ind2, subfields = \
                                             subfields, field_position_local = \
                                                int(position))
            # now adding the subfields
            for tag in subfieldsToAdd.keys():
                for fieldPosition in subfieldsToAdd[tag].keys(): #now the fields
                                                          #order not important !
                    subfieldsPositions = subfieldsToAdd[tag][fieldPosition]. \
                                           keys()
                    subfieldsPositions.sort()
                    for subfieldPosition in subfieldsPositions:
                        subfield = subfieldsToAdd[tag][fieldPosition]\
                            [subfieldPosition]
                        record_add_subfield_into(record, tag, subfield[0], \
                                                 subfield[1], \
                                                 subfield_position = \
                                                     int(subfieldPosition), \
                                                 field_position_local = \
                                                     int(fieldPosition))

            response['resultCode'] = \
                CFG_BIBEDIT_AJAX_RESULT_CODES_REV['added_positioned_subfields']

        elif request_type == 'modifyField': # changing the field structure
            # first remove subfields and then add new... change the indices
            subfields = data['subFields'] # parse the JSON representation of
                                          # the subfields here

            new_field = create_field(subfields, data['ind1'], data['ind2'])
            record_replace_field(record, data['tag'], new_field, \
                field_position_local = data['fieldPosition'])
            response['resultCode'] = 26

        elif request_type == 'modifyContent':
            if data['subfieldIndex'] != None:
                record_modify_subfield(record, data['tag'],
                    data['subfieldCode'], data['value'],
                    int(data['subfieldIndex']),
                    field_position_local=field_position_local)
            else:
                record_modify_controlfield(record, data['tag'], data["value"],
                  field_position_local=field_position_local)
            response['resultCode'] = 24

        elif request_type == 'moveSubfield':
            record_move_subfield(record, data['tag'],
                int(data['subfieldIndex']), int(data['newSubfieldIndex']),
                field_position_local=field_position_local)
            response['resultCode'] = 25

        elif request_type == 'moveField':
            if data['direction'] == 'up':
                final_position_local = field_position_local-1
            else: # direction is 'down'
                final_position_local = field_position_local+1
            record_move_fields(record, data['tag'], [field_position_local],
                final_position_local)
            response['resultCode'] = 32

        elif request_type == 'deleteFields':
            to_delete = data['toDelete']
            deleted_fields = 0
            deleted_subfields = 0
            for tag in to_delete:
                #Sorting the fields in a edcreasing order by the local position!
                fieldsOrder = to_delete[tag].keys()
                fieldsOrder.sort(lambda a, b: int(b) - int(a))
                for field_position_local in fieldsOrder:
                    if not to_delete[tag][field_position_local]:
                        # No subfields specified - delete entire field.
                        record_delete_field(record, tag,
                            field_position_local=int(field_position_local))
                        deleted_fields += 1
                    else:
                        for subfield_position in \
                                to_delete[tag][field_position_local][::-1]:
                            # Delete subfields in reverse order (to keep the
                            # indexing correct).
                            record_delete_subfield_from(record, tag,
                                int(subfield_position),
                                field_position_local=int(field_position_local))
                            deleted_subfields += 1
            if deleted_fields == 1 and deleted_subfields == 0:
                response['resultCode'] = 26
            elif deleted_fields and deleted_subfields == 0:
                response['resultCode'] = 27
            elif deleted_subfields == 1 and deleted_fields == 0:
                response['resultCode'] = 28
            elif deleted_subfields and deleted_fields == 0:
                response['resultCode'] = 29
            else:
                response['resultCode'] = 30
        response['cacheMTime'], response['cacheDirty'] = \
            update_cache_file_contents(recid, uid, record_revision, record, \
                                       pending_changes, \
                                       deactivated_hp_changes, \
                                       undo_list, redo_list), \
            True

    return response