Exemple #1
0
def show_papers(personid, external_id=None, orcid=None, inspire=None):
    search = 'select * from aidPERSONIDPAPERS where personid='
    search += str(personid) + ' and flag>-2'
    result = run_sql(search)
    hep_records = ''
    for personid, table, bibref, bibrec, author, match, flag, cul, date \
                       in result:
    #for personid, table, bibref, bibrec, author in result:
        #rec = AmendableRecord(get_bibrecord(bibrec))
        position = -1
        author_name = get_name_by_bibref((table, bibref))
        for key, value in AmendableRecord(get_bibrecord(bibrec)).\
                   iterfields(['{0}__%'.format(table, )]):
            if (key[0] == '700__a' or key[0] == '100__a') and \
                                            value == author_name:
                position = key[1]
        if position >= 0:
            for key, value in AmendableRecord(get_bibrecord(bibrec)).\
                   iterfields(['{0}__%'.format(table, )]):
                if key[1] == position and key[0] in \
                      ('{0}__a'.format(table), '{0}__i'.
                       format(table), '{0}__j'.format(table),
                       '{0}__k'.format(table), '{0}__m'.format(table), ):
                    if value.replace('ORCID:', '') == external_id and \
                       value.replace('ORCID:', '') != orcid and \
                       value != inspire:
                       hep_records += "    " + " ".join([str(bibrec),
                                                         author, value, '\n'])
    if hep_records:
        return hep_records
    return None
def show_papers(personid, external_id=None, orcid=None, inspire=None):
    search = 'select * from aidPERSONIDPAPERS where personid='
    search += str(personid) + ' and flag>-2'
    result = run_sql(search)
    hep_records = ''
    for personid, table, bibref, bibrec, author, match, flag, cul, date \
                       in result:
    #for personid, table, bibref, bibrec, author in result:
        #rec = AmendableRecord(get_bibrecord(bibrec))
        position = -1
        author_name = get_name_by_bibref((table, bibref))
        for key, value in AmendableRecord(get_bibrecord(bibrec)).\
                   iterfields(['{0}__%'.format(table, )]):
            if (key[0] == '700__a' or key[0] == '100__a') and \
                                            value == author_name:
                position = key[1]
        if position >= 0:
            for key, value in AmendableRecord(get_bibrecord(bibrec)).\
                   iterfields(['{0}__%'.format(table, )]):
                if key[1] == position and key[0] in \
                      ('{0}__a'.format(table), '{0}__i'.
                       format(table), '{0}__j'.format(table),
                       '{0}__k'.format(table), '{0}__m'.format(table), ):
                    if value.replace('ORCID:', '') == external_id and \
                       value.replace('ORCID:', '') != orcid and \
                       value != inspire:
                       hep_records += "    " + " ".join([str(bibrec),
                                                         author, value, '\n'])
    if hep_records:
        return hep_records
    return None
def getInspireRecordMetadata(inspireID):
    '''For a given INSPIRE ID, collect the desired metadata fields
       and return them.
    '''

    fieldArray = {'0247_2': 'stdIDsource', '0247_a': 'stdID',
                  '245__a': 'title', '8564_u': 'files'}
    fieldValues = {}
    fieldKeys = fieldArray.keys()
    for fKey in fieldKeys:
        fieldValues[fKey] = get_fieldvalues(inspireID, fKey)
        print "fieldValues=", fKey, ":", fieldValues[fKey]

    # ThS suggested approach for dealing with the problem of two repeating
    # fields that correspond (say, a type in one field, and a value in another)
    record = AmendableRecord(get_bibrecord(inspireID))
    for _, val in record.iterfield('035__a', subfield_filter=('9', 'arXiv')):
        fieldValues['arxivID'] = val

    pdfList = []
    for z in fieldValues['8564_u']:
        if 'pdf' in z:
            pdfList.append(z)
    fieldValues['8564_u'] = pdfList

    return fieldValues
Exemple #4
0
def create_ticket(recid, bibcatalog_system, queue=CFG_REFEXTRACT_TICKET_QUEUE):
    write_message("bibcatalog_system %s" % bibcatalog_system, verbose=1)
    write_message("queue %s" % queue, verbose=1)
    if bibcatalog_system and queue:

        subject = "Refs for #%s" % recid

        # Add report number in the subjecet
        report_number = ""
        record = get_bibrecord(recid)

        in_hep = False
        for collection_tag in record_get_field_instances(record, "980"):
            for collection in field_get_subfield_values(collection_tag, "a"):
                if collection == "HEP":
                    in_hep = True

        # Only create tickets for HEP
        if not in_hep:
            write_message("not in hep", verbose=1)
            return

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, "c"):
                if category.startswith("astro-ph"):
                    write_message("astro-ph", verbose=1)
                    # We do not curate astro-ph
                    return

            for report_number in field_get_subfield_values(report_tag, "a"):
                subject += " " + report_number
                break

        text = "%s/record/edit/#state=edit&recid=%s" % (CFG_SITE_SECURE_URL, recid)
        bibcatalog_system.ticket_submit(subject=subject, queue=queue, text=text, recordid=recid)
def populate_cnums():
    """
    Populates table seqSTORE with the cnums present in CONFERENCE records
    """
    # First get all records from conference collection
    conf_records = perform_request_search(cc="Conferences", p="111__g:C*", rg=0)

    for recid in conf_records:
        cnum = record_get_field_value(get_bibrecord(recid), tag="111", ind1="", ind2="", code="g")
        if cnum:
            if not _cnum_exists(cnum):
                _insert_cnum(cnum)
                print "cnum %s from record %s inserted" % (cnum, recid)
    def _next_value(self, recid=None, xml_record=None, start_date=None):
        """
        Returns the next cnum for the given recid

        @param recid: id of the record where the cnum will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @param start_date: use given start date
        @type start_date: string

        @return: next cnum for the given recid. Format is Cyy-mm-dd.[.1n]
        @rtype: string

        @raises ConferenceNoStartDateError: No date information found in the
        given recid
        """
        bibrecord = None
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif recid is not None:
            bibrecord = get_bibrecord(recid)

        if start_date is None and bibrecord is not None:
            start_date = record_get_field_value(bibrecord,
                                                tag="111",
                                                ind1="",
                                                ind2="",
                                                code="x")

        if not start_date:
            raise ConferenceNoStartDateError

        base_cnum = "C" + start_date[2:]

        record_cnums = self._get_record_cnums(base_cnum)
        if not record_cnums:
            new_cnum = base_cnum
        elif len(record_cnums) == 1:
            new_cnum = base_cnum + '.' + '1'
        else:
            # Get the max current revision, cnums are in format Cyy-mm-dd,
            # Cyy-mm-dd.1, Cyy-mm-dd.2
            highest_revision = max(
                [int(rev[0].split('.')[1]) for rev in record_cnums[1:]])
            new_cnum = base_cnum + '.' + str(highest_revision + 1)

        return new_cnum
Exemple #7
0
    def _next_value(self, recid=None, xml_record=None, start_date=None):
        """
        Returns the next cnum for the given recid

        @param recid: id of the record where the cnum will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @param start_date: use given start date
        @type start_date: string

        @return: next cnum for the given recid. Format is Cyy-mm-dd.[.1n]
        @rtype: string

        @raises ConferenceNoStartDateError: No date information found in the
        given recid
        """
        bibrecord = None
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif recid is not None:
            bibrecord = get_bibrecord(recid)

        if start_date is None and bibrecord is not None:
            start_date = record_get_field_value(bibrecord,
                                                tag="111",
                                                ind1="",
                                                ind2="",
                                                code="x")

        if not start_date:
            raise ConferenceNoStartDateError

        base_cnum = "C" + start_date[2:]

        record_cnums = self._get_record_cnums(base_cnum)
        if not record_cnums:
            new_cnum = base_cnum
        elif len(record_cnums) == 1:
            new_cnum = base_cnum + '.' + '1'
        else:
            # Get the max current revision, cnums are in format Cyy-mm-dd,
            # Cyy-mm-dd.1, Cyy-mm-dd.2
            highest_revision = max([int(rev[0].split('.')[1]) for rev in record_cnums[1:]])
            new_cnum = base_cnum + '.' + str(highest_revision + 1)

        return new_cnum
Exemple #8
0
def _create_ticket(recid, bibcatalog_system, queue):
    subject = "Refs for #%s" % recid

    if CFG_INSPIRE_SITE:
        # Add report number in the subjecet
        report_number = ""
        record = get_bibrecord(recid)

        in_core = False
        for collection_tag in record_get_field_instances(record, "980"):
            for collection in field_get_subfield_values(collection_tag, 'a'):
                if collection == 'CORE':
                    in_core = True
                if collection == 'arXiv':
                    # Do not create tickets for arxiv papers
                    # Tickets for arxiv papers are created in bibcatelog
                    write_message("arXiv paper", verbose=1)
                    return

        # Do not create tickets for user submissions
        for source_field in record_get_field_instances(record, "541"):
            for source in field_get_subfield_values(source_field, "c"):
                if source == "submission":
                    write_message("User submitted paper", verbose=1)
                    return

        # Only create tickets for CORE papers
        if not in_core:
            write_message("not in core papers", verbose=1)
            return

        # Do not create tickets for old records
        creation_date = run_sql(
            """SELECT creation_date FROM bibrec
                                   WHERE id = %s""", [recid])[0][0]
        if creation_date < datetime.now() - timedelta(days=30 * 4):
            return

        for report_tag in record_get_field_instances(record, "037"):
            for report_number in field_get_subfield_values(report_tag, 'a'):
                subject += " " + report_number
                break

    text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL, recid)
    bibcatalog_system.ticket_submit(subject=subject,
                                    queue=queue,
                                    text=text,
                                    recordid=recid)
def _create_ticket(recid, bibcatalog_system, queue):
    subject = "Refs for #%s" % recid

    if CFG_INSPIRE_SITE:
        # Add report number in the subjecet
        report_number = ""
        record = get_bibrecord(recid)

        in_core = False
        for collection_tag in record_get_field_instances(record, "980"):
            for collection in field_get_subfield_values(collection_tag, 'a'):
                if collection == 'CORE':
                    in_core = True
                if collection == 'arXiv':
                    # Do not create tickets for arxiv papers
                    # Tickets for arxiv papers are created in bibcatelog
                    write_message("arXiv paper", verbose=1)
                    return

        # Only create tickets for HEP
        if not in_core:
            write_message("not in hep", verbose=1)
            return

        # Do not create tickets for old records
        creation_date = run_sql("""SELECT creation_date FROM bibrec
                                   WHERE id = %s""", [recid])[0][0]
        if creation_date < datetime.now() - timedelta(days=30*4):
            return

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'c'):
                if category.startswith('astro-ph'):
                    write_message("astro-ph", verbose=1)
                    # We do not curate astro-ph
                    return

            for report_number in field_get_subfield_values(report_tag, 'a'):
                subject += " " + report_number
                break

    text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL,
                                                    recid)
    bibcatalog_system.ticket_submit(subject=subject,
                                    queue=queue,
                                    text=text,
                                    recordid=recid)
Exemple #10
0
def enumerate_records(records):
    """
    Given an array of record IDs this function will yield a
    triplet of the count (starting from 0), the record ID and
    the record object.

    @param record: Array of record IDs
    @type record: int

    @yield: tuple (count, recordId, record structure (dict))
    """
    for i, recid in enumerate(records):
        record = get_bibrecord(int(recid))
        if not record:
            write_message("Error: could not load record '%s'." % (recid,))
            continue
        yield i, int(recid), AmendableRecord(record)
Exemple #11
0
def load_records_from_id(records):
    """
    Given a record tuple of record id and last updated/created date,
    this function will yield a tuple with the record id replaced with
    a record structure iterativly.

    @param record: tuple of (recid, date-string) Ex: (1, 2012-12-12 12:12:12)
    @type record: tuple

    @yield: tuple of (record structure (dict), date-string)
    """
    for recid, date in records:
        record = get_bibrecord(int(recid))
        if not record:
            write_message("Error: could not load record %s" % (recid, ))
            continue
        yield record, date
Exemple #12
0
def populate_cnums():
    """
    Populates table seqSTORE with the cnums present in CONFERENCE records
    """
    # First get all records from conference collection
    conf_records = perform_request_search(f="collection", p="CONFERENCES")

    for recid in conf_records:
        cnum = record_get_field_value(get_bibrecord(recid),
                                      tag="111",
                                      ind1="",
                                      ind2="",
                                      code="g")
        if cnum:
            if not _cnum_exists(cnum):
                _insert_cnum(cnum)
                print "cnum %s from record %s inserted" % (cnum, recid)
Exemple #13
0
def enumerate_records(records):
    """
    Given an array of record IDs this function will yield a
    triplet of the count (starting from 0), the record ID and
    the record object.

    @param record: Array of record IDs
    @type record: int

    @yield: tuple (count, recordId, record structure (dict))
    """
    for i, recid in enumerate(records):
        record = get_bibrecord(int(recid))
        if not record:
            write_message("Error: could not load record '%s'." % (recid, ))
            continue
        yield i, int(recid), AmendableRecord(record)
def load_records_from_id(records):
    """
    Given a record tuple of record id and last updated/created date,
    this function will yield a tuple with the record id replaced with
    a record structure iterativly.

    @param record: tuple of (recid, date-string) Ex: (1, 2012-12-12 12:12:12)
    @type record: tuple

    @yield: tuple of (record structure (dict), date-string)
    """
    for recid, date in records:
        record = get_bibrecord(int(recid))
        if not record:
            write_message("Error: could not load record %s" % (recid,))
            continue
        yield record, date
Exemple #15
0
def create_ticket(recid, bibcatalog_system, queue=CFG_REFEXTRACT_TICKET_QUEUE):
    write_message('bibcatalog_system %s' % bibcatalog_system, verbose=1)
    write_message('queue %s' % queue, verbose=1)
    if bibcatalog_system and queue:

        subject = "Refs for #%s" % recid

        # Add report number in the subjecet
        report_number = ""
        record = get_bibrecord(recid)

        in_hep = False
        for collection_tag in record_get_field_instances(record, "980"):
            for collection in field_get_subfield_values(collection_tag, 'a'):
                if collection == 'HEP':
                    in_hep = True

        # Only create tickets for HEP
        if not in_hep:
            write_message("not in hep", verbose=1)
            return

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'c'):
                if category.startswith('astro-ph'):
                    write_message("astro-ph", verbose=1)
                    # We do not curate astro-ph
                    return

            for report_number in field_get_subfield_values(report_tag, 'a'):
                subject += " " + report_number
                break

        text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL, \
                                                        recid)
        bibcatalog_system.ticket_submit(subject=subject,
                                        queue=queue,
                                        text=text,
                                        recordid=recid)
    ind2 = marc[4].replace('_', ' ')
    sfcode = marc[5]
    to_split = fields_to_split(record, tag, ind1, ind2, sfcode)

    if not to_split:
        continue
#   work from the back to try to preserve order
    positions = to_split.keys()
    positions.sort(reverse=True)
    for global_pos in positions:
        (parts, rest_before, rest_after) = to_split[global_pos]
        message += " - split %s %s" % (tag, parts)
        record_delete_field(record, tag, ind1, ind2,
                            field_position_global=global_pos)
        parts.reverse()
        for subfield in parts:
            field = rest_before + [subfield, ] + rest_after
            record_add_field(record, tag, ind1, ind2, '', field,
                             field_position_global=global_pos)
    if message:
        record.set_amended(message)


if __name__ == '__main__':
    for record in test_records:
        record = AmendableRecord(get_bibrecord(record))
        record.rule  = {}
        record.rule['name'] = 'melissa'
        record.rule['holdingpen'] = False
        check_record(record)
                                id[1][0], id[2], recid, inspire_id)
#                          record.warn("%s from HEPNames doesn't match id for author %s in record %s (%s)" % (id[1][0], id[2], record, inspire_id))
                    else:
                        print "email: %s, inspire-id: %s" % (id[2], id[1][0])
                        additions.append((id[0], 'i', id[1][0]))
                if id[1][1]:
                    if orcid_true:
                        if orcid == id[1][1]:
                            print "%s in %s already has an ORICD" % (id[2],
                                                                     recid)
                        else:
                            print "%s from HEPNames doesn't match id for author %s in record %s (%s)" % (
                                id[1][1], id[2], recid, orcid)
#                        record.warn("%s from HEPNames doesn't match id for author %s in record %s (%s)" % (id[1][1], id[2], recid, orcid))
                    else:
                        print "email: %s, orcid: %s" % (id[2], id[1][1])
                        additions.append((id[0], 'j', id[1][1]))
    print "additions: ", additions
    for addition in additions:
        print "Adding %s to tag %s at position %s in %s" % (
            addition[2], addition[0][0], addition[0][1], recid)


#          record_add_subfield_into(record, addition[0][0], addition[1], addition[2], field_position_local=addition[0][1])

if __name__ == '__main__':
    for r in test_records:
        print 'working on ', r
        record = AmendableRecord(get_bibrecord(r))
        check_record(record)
def perform_request_record(req, request_type, recid, uid, data):
    """Handle 'major' record related requests like fetching, submitting or
    deleting a record, cancel editing or preparing a record for merging.

    """
    response = {}

    if request_type == 'newRecord':
        # Create a new record.
        new_recid = reserve_record_id()
        new_type = data['newType']
        if new_type == 'empty':
            # Create a new empty record.
            create_cache_file(recid, uid)
            response['resultCode'], response['newRecID'] = 6, new_recid

        elif new_type == 'template':
            # Create a new record from XML record template.
            template_filename = data['templateFilename']
            template = get_record_template(template_filename)
            if not template:
                response['resultCode']  = 108
            else:
                record = create_record(template)[0]
                if not record:
                    response['resultCode']  = 109
                else:
                    record_add_field(record, '001',
                                     controlfield_value=str(new_recid))
                    create_cache_file(new_recid, uid, record, True)
                    response['resultCode'], response['newRecID']  = 7, new_recid

        elif new_type == 'clone':
            # Clone an existing record (from the users cache).
            existing_cache = cache_exists(recid, uid)
            if existing_cache:
                try:
                    record = get_cache_file_contents(recid, uid)[2]
                except:
                    # if, for example, the cache format was wrong (outdated)
                    record = get_bibrecord(recid)
            else:
                # Cache missing. Fall back to using original version.
                record = get_bibrecord(recid)
            record_delete_field(record, '001')
            record_add_field(record, '001', controlfield_value=str(new_recid))
            create_cache_file(new_recid, uid, record, True)
            response['resultCode'], response['newRecID'] = 8, new_recid
    elif request_type == 'getRecord':
        # Fetch the record. Possible error situations:
        # - Non-existing record
        # - Deleted record
        # - Record locked by other user
        # - Record locked by queue
        # A cache file will be created if it does not exist.
        # If the cache is outdated (i.e., not based on the latest DB revision),
        # cacheOutdated will be set to True in the response.
        record_status = record_exists(recid)
        existing_cache = cache_exists(recid, uid)
        read_only_mode = False
        if data.has_key("inReadOnlyMode"):
            read_only_mode = data['inReadOnlyMode']

        if record_status == 0:
            response['resultCode'] = 102
        elif record_status == -1:
            response['resultCode'] = 103
        elif not read_only_mode and not existing_cache and \
                record_locked_by_other_user(recid, uid):
            response['resultCode'] = 104
        elif not read_only_mode and existing_cache and \
                cache_expired(recid, uid) and \
                record_locked_by_other_user(recid, uid):
            response['resultCode'] = 104
        elif not read_only_mode and record_locked_by_queue(recid):
            response['resultCode'] = 105
        else:
            if data.get('deleteRecordCache'):
                delete_cache_file(recid, uid)
                existing_cache = False
                pending_changes = []
                disabled_hp_changes = {}
            if read_only_mode:
                if data.has_key('recordRevision'):
                    record_revision_ts = data['recordRevision']
                    record_xml = get_marcxml_of_revision(recid, record_revision_ts)
                    record = create_record(record_xml)[0]
                    record_revision = timestamp_to_revision(record_revision_ts)
                    pending_changes = []
                    disabled_hp_changes = {}
                else:
                    # a normal cacheless retrieval of a record
                    record = get_bibrecord(recid)
                    record_revision = get_record_last_modification_date(recid)
                    pending_changes = []
                    disabled_hp_changes = {}
                cache_dirty = False
                mtime = 0
            elif not existing_cache:
                record_revision, record = create_cache_file(recid, uid)
                mtime = get_cache_mtime(recid, uid)
                pending_changes = []
                disabled_hp_changes = {}
                cache_dirty = False
            else:
                try:
                    cache_dirty, record_revision, record, pending_changes, disabled_hp_changes= \
                        get_cache_file_contents(recid, uid)
                    touch_cache_file(recid, uid)
                    mtime = get_cache_mtime(recid, uid)
                    if not latest_record_revision(recid, record_revision):
                        response['cacheOutdated'] = True
                except:
                    record_revision, record = create_cache_file(recid, uid)
                    mtime = get_cache_mtime(recid, uid)
                    pending_changes = []
                    disabled_hp_changes = {}
                    cache_dirty = False

            if data['clonedRecord']:
                response['resultCode'] = 9
            else:
                response['resultCode'] = 3

            revision_author = get_record_revision_author(recid, record_revision)
            last_revision_ts = revision_to_timestamp(get_record_last_modification_date(recid))
            revisions_history = get_record_revision_timestamps(recid)

            response['cacheDirty'], response['record'], response['cacheMTime'],\
                response['recordRevision'], response['revisionAuthor'], \
                response['lastRevision'], response['revisionsHistory'], \
                response['inReadOnlyMode'], response['pendingHpChanges'], \
                response['disabledHpChanges'] = cache_dirty, record, mtime, \
                revision_to_timestamp(record_revision), revision_author, \
                last_revision_ts, revisions_history, read_only_mode, pending_changes, \
                disabled_hp_changes
            # Set tag format from user's session settings.
            try:
                tagformat_settings = session_param_get(req, 'bibedit_tagformat')
                tagformat = tagformat_settings[recid]
            except KeyError:
                tagformat = CFG_BIBEDIT_TAG_FORMAT
            response['tagFormat'] = tagformat

    elif request_type == 'submit':
        # Submit the record. Possible error situations:
        # - Missing cache file
        # - Cache file modified in other editor
        # - Record locked by other user
        # - Record locked by queue
        # - Invalid XML characters
        # If the cache is outdated cacheOutdated will be set to True in the
        # response.
        if not cache_exists(recid, uid):
            response['resultCode'] = 106
        elif not get_cache_mtime(recid, uid) == data['cacheMTime']:
            response['resultCode'] = 107
        elif cache_expired(recid, uid) and \
                record_locked_by_other_user(recid, uid):
            response['resultCode'] = 104
        elif record_locked_by_queue(recid):
            response['resultCode'] = 105
        else:
            try:
                record_revision, record, pending_changes, disabled_changes = get_cache_file_contents(recid, uid)[1:]
                xml_record = print_rec(record)
                record, status_code, list_of_errors = create_record(xml_record)
                if status_code == 0:
                    response['resultCode'], response['errors'] = 110, \
                        list_of_errors
                elif not data['force'] and \
                        not latest_record_revision(recid, record_revision):
                    response['cacheOutdated'] = True
                else:
                    save_xml_record(recid, uid)
                    response['resultCode'] = 4
            except:
                response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['wrong_cache_file_format']
    elif request_type == 'revert':
        revId = data['revId']
        job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups()
        revision_xml = get_marcxml_of_revision(recid, job_date)
        save_xml_record(recid, uid, revision_xml)
        if (cache_exists(recid, uid)):
            delete_cache_file(recid, uid)
        response['resultCode'] = 4

    elif request_type == 'cancel':
        # Cancel editing by deleting the cache file. Possible error situations:
        # - Cache file modified in other editor
        if cache_exists(recid, uid):
            if get_cache_mtime(recid, uid) == data['cacheMTime']:
                delete_cache_file(recid, uid)
                response['resultCode'] = 5
            else:
                response['resultCode'] = 107
        else:
            response['resultCode'] = 5

    elif request_type == 'deleteRecord':
        # Submit the record. Possible error situations:
        # - Record locked by other user
        # - Record locked by queue
        # As the user is requesting deletion we proceed even if the cache file
        # is missing and we don't check if the cache is outdated or has
        # been modified in another editor.
        existing_cache = cache_exists(recid, uid)
        pending_changes = []
        if existing_cache and cache_expired(recid, uid) and \
                record_locked_by_other_user(recid, uid):
            response['resultCode'] = 104
        elif record_locked_by_queue(recid):
            response['resultCode'] = 105
        else:
            if not existing_cache:
                record_revision, record, pending_changes, desactivated_hp_changes = create_cache_file(recid, uid)
            else:
                try:
                    record_revision, record, pending_changes, desactivated_hp_changes = get_cache_file_contents(
                        recid, uid)[1:]
                except:
                    record_revision, record, pending_changes, desactivated_hp_changes = create_cache_file(recid, uid)
            record_add_field(record, '980', ' ', ' ', '', [('c', 'DELETED')])
            update_cache_file_contents(recid, uid, record_revision, record, pending_changes, desactivated_hp_changes)
            save_xml_record(recid, uid)
            delete_related_holdingpen_changes(recid) # we don't need any changes related to a deleted record
            response['resultCode'] = 10

    elif request_type == 'deleteRecordCache':
        # Delete the cache file. Ignore the request if the cache has been
        # modified in another editor.
        if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == \
                data['cacheMTime']:
            delete_cache_file(recid, uid)
        response['resultCode'] = 11

    elif request_type == 'prepareRecordMerge':
        # We want to merge the cache with the current DB version of the record,
        # so prepare an XML file from the file cache, to be used by BibMerge.
        # Possible error situations:
        # - Missing cache file
        # - Record locked by other user
        # - Record locked by queue
        # We don't check if cache is outdated (a likely scenario for this
        # request) or if it has been modified in another editor.
        if not cache_exists(recid, uid):
            response['resultCode'] = 106
        elif cache_expired(recid, uid) and \
                record_locked_by_other_user(recid, uid):
            response['resultCode'] = 104
        elif record_locked_by_queue(recid):
            response['resultCode'] = 105
        else:
            save_xml_record(recid, uid, to_upload=False, to_merge=True)
            response['resultCode'] = 12

    return response
    def _next_value(self, recid=None, xml_record=None, bibrecord=None):
        """
        Returns the next texkey for the given recid

        @param recid: id of the record where the texkey will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @return: next texkey for the given recid.
        @rtype: string

        @raises TexkeyNoAuthorError: No main author (100__a) or collaboration
        (710__g) in the given recid
        """
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif bibrecord is None:
            bibrecord = get_bibrecord(recid)

        main_author = record_get_field_value(bibrecord,
                                            tag="100",
                                            ind1="",
                                            ind2="",
                                            code="a")

        if not main_author:
            # Try with collaboration name
            main_author = record_get_field_value(bibrecord,
                                            tag="710",
                                            ind1="",
                                            ind2="",
                                            code="g")
            main_author = "".join([p for p in main_author.split()
                                if p.lower() != "collaboration"])

        if not main_author:
            # Try with corporate author
            main_author = record_get_field_value(bibrecord,
                                            tag="100",
                                            ind1="",
                                            ind2="",
                                            code="a")
            if not main_author:
                raise TexkeyNoAuthorError

        # Remove utf-8 special characters
        main_author = unidecode(main_author.decode('utf-8'))
        try:
            texkey_first_part = main_author.split(',')[0].replace(" ", "")
        except KeyError:
            texkey_first_part = ""

        year = record_get_field_value(bibrecord,
                                        tag="269",
                                        ind1="",
                                        ind2="",
                                        code="c")
        if not year:
            year = record_get_field_value(bibrecord,
                                    tag="260",
                                    ind1="",
                                    ind2="",
                                    code="c")
            if not year:
                year = record_get_field_value(bibrecord,
                                    tag="773",
                                    ind1="",
                                    ind2="",
                                    code="y")
                if not year:
                    year = record_get_field_value(bibrecord,
                                    tag="502",
                                    ind1="",
                                    ind2="",
                                    code="d")

                    if not year:
                        raise TexkeyNoYearError

        try:
            texkey_second_part = year.split("-")[0]
        except KeyError:
            texkey_second_part = ""

        texkey_third_part = _texkey_random_chars(recid)

        texkey = texkey_first_part + ":" + texkey_second_part + texkey_third_part

        tries = 0
        while self._value_exists(texkey) and tries < TEXKEY_MAXTRIES:
            # Key is already in the DB, generate a new one
            texkey_third_part = _texkey_random_chars(recid, use_random=True)
            texkey = texkey_first_part + ":" + texkey_second_part + texkey_third_part
            tries += 1

        return texkey
Exemple #20
0
    def _next_value(self, recid=None, xml_record=None, bibrecord=None):
        """
        Returns the next texkey for the given recid

        @param recid: id of the record where the texkey will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @return: next texkey for the given recid.
        @rtype: string

        @raises TexkeyNoAuthorError: No main author (100__a) or collaboration
        (710__g) in the given recid
        """
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif bibrecord is None:
            bibrecord = get_bibrecord(recid)

        main_author = record_get_field_value(bibrecord,
                                             tag="100",
                                             ind1="",
                                             ind2="",
                                             code="a")

        if not main_author:
            # Try with collaboration name
            main_author = record_get_field_value(bibrecord,
                                                 tag="710",
                                                 ind1="",
                                                 ind2="",
                                                 code="g")
            main_author = "".join([
                p for p in main_author.split() if p.lower() != "collaboration"
            ])

        if not main_author:
            # Try with corporate author
            main_author = record_get_field_value(bibrecord,
                                                 tag="100",
                                                 ind1="",
                                                 ind2="",
                                                 code="a")
            if not main_author:
                raise TexkeyNoAuthorError

        # Remove utf-8 special characters
        main_author = unidecode(main_author.decode('utf-8'))
        try:
            texkey_first_part = main_author.split(',')[0].replace(" ", "")
        except KeyError:
            texkey_first_part = ""

        year = record_get_field_value(bibrecord,
                                      tag="269",
                                      ind1="",
                                      ind2="",
                                      code="c")
        if not year:
            year = record_get_field_value(bibrecord,
                                          tag="260",
                                          ind1="",
                                          ind2="",
                                          code="c")
            if not year:
                year = record_get_field_value(bibrecord,
                                              tag="773",
                                              ind1="",
                                              ind2="",
                                              code="y")
                if not year:
                    year = record_get_field_value(bibrecord,
                                                  tag="502",
                                                  ind1="",
                                                  ind2="",
                                                  code="d")

                    if not year:
                        raise TexkeyNoYearError

        try:
            texkey_second_part = year.split("-")[0]
        except KeyError:
            texkey_second_part = ""

        texkey_third_part = _texkey_random_chars(recid)

        texkey = texkey_first_part + ":" + texkey_second_part + texkey_third_part

        tries = 0
        while self._value_exists(texkey) and tries < TEXKEY_MAXTRIES:
            # Key is already in the DB, generate a new one
            texkey_third_part = _texkey_random_chars(recid, use_random=True)
            texkey = texkey_first_part + ":" + texkey_second_part + texkey_third_part
            tries += 1

        return texkey
def Update_Approval_DB(parameters, curdir, form, user_info=None):
    """
    This function updates the approval database when a document has
    just been approved or rejected. It uses the [categformatDAM]
    parameter to compute the category of the document.  Must be called
    after the Get_Report_Number function.

    Parameters:

       * categformatDAM: It contains the regular expression which
                         allows the retrieval of the category from the
                         reference number.
                         Eg: if [categformatDAM]="TEST-<CATEG>-.*" and
                         the reference is "TEST-CATEG1-2001-001" then
                         the category will be recognized as "CATEG1".
    """
    global rn, sysno
    doctype = form['doctype']
    act = form['act']
    categformat = parameters['categformatDAM']

    ## Get the name of the decision file:
    try:
        decision_filename = parameters['decision_file']
    except KeyError:
        decision_filename = ""

    pid = os.getpid()
    now = time.time()
    access = "%i_%s" % (now,pid)
    if act not in ["APP", "APS", "APM", "APO"]:
        # retrieve category
        if re.search("<FILE:",categformat):
            filename = categformat.replace("<FILE:","")
            filename = filename.replace(">","")
            if os.path.exists("%s/%s" % (curdir,filename)):
                fp = open("%s/%s" % (curdir,filename))
                category = fp.read()
                fp.close()
            else:
                category=""
            category = category.replace("\n","")
        else:
            categformat = categformat.replace("<CATEG>","([^-]*)")
            m_categ_search = re.match(categformat, rn)
            if m_categ_search is not None:
                if len(m_categ_search.groups()) > 0:
                    ## Found a match for the category of this document. Get it:
                    category = m_categ_search.group(1)
                else:
                    ## This document has no category.
                    category = ""
            else:
                category = ""

        if category == "":
            category = "unknown"
        sth = run_sql("SELECT status,dFirstReq,dLastReq,dAction FROM sbmAPPROVAL WHERE  doctype=%s and categ=%s and rn=%s", (doctype,category,rn,))

        if len(sth) == 0:
            run_sql("INSERT INTO sbmAPPROVAL (doctype, categ, rn, status, dFirstReq, dLastReq, dAction, access) VALUES (%s,%s,%s,'waiting',NOW(),NOW(),'',%s)", (doctype,category,rn,access,))
        else:
            run_sql("UPDATE sbmAPPROVAL SET dLastReq=NOW(), status='waiting' WHERE  doctype=%s and categ=%s and rn=%s", (doctype,category,rn,))
    else:
        ## Since this is the "APP" action, this call of the function must be
        ## on behalf of the referee - in order to approve or reject an item.
        ## We need to get the decision from the decision file:
        if decision_filename in (None, "", "NULL"):
            ## We don't have a name for the decision file.
            ## For backward compatibility reasons, try to read the decision from
            ## a file called 'decision' in curdir:
            if os.path.exists("%s/decision" % curdir):
                fh_decision = open("%s/decision" % curdir, "r")
                decision = fh_decision.read()
                fh_decision.close()
            else:
                decision = ""
        else:
            ## Try to read the decision from the decision file:
            try:
                fh_decision = open("%s/%s" % (curdir, decision_filename), "r")
                decision = fh_decision.read().strip()
                fh_decision.close()
            except IOError:
                ## Oops, unable to open the decision file.
                decision = ""

        from invenio.bibrecord import record_delete_field, record_add_field, record_xml_output
        from invenio.bibedit_utils import get_bibrecord
        from invenio.bibtask import task_low_level_submission
        record = get_bibrecord(sysno)
        ## Either approve or reject the item, based upon the contents
        ## of 'decision':
        if decision == "approve":
            run_sql("UPDATE sbmAPPROVAL SET dAction=NOW(),status='approved' WHERE  rn=%s", (rn,))
        else:
            run_sql("UPDATE sbmAPPROVAL SET dAction=NOW(),status='rejected' WHERE  rn=%s", (rn,))
            if act == "APS":
                record_delete_field(record, "980")
                record_add_field(record, '980', ' ', ' ', '', [('a', 'REJBLOG')])
                fd, name = tempfile.mkstemp(suffix='.xml', dir=CFG_TMPDIR)
                os.write(fd, """<collection>\n""")
                os.write(fd, record_xml_output(record))
                os.write(fd, """</collection\n>""")
                os.close(fd)
                task_low_level_submission('bibupload', 'admin', '-c', name)
                task_low_level_submission('bibindex', 'admin')
                task_low_level_submission('webcoll', 'admin', '-c', "Provisional Blogs")
                task_low_level_submission('webcoll', 'admin', '-c', "Blogs")
    return ""
Exemple #22
0
def perform_request_record(req, request_type, recid, uid, data, ln=CFG_SITE_LANG):
    """Handle 'major' record related requests like fetching, submitting or
    deleting a record, cancel editing or preparing a record for merging.

    """
    response = {}

    if request_type == "newRecord":
        # Create a new record.
        new_recid = reserve_record_id()
        new_type = data["newType"]
        if new_type == "empty":
            # Create a new empty record.
            create_cache_file(recid, uid)
            response["resultCode"], response["newRecID"] = 6, new_recid

        elif new_type == "template":
            # Create a new record from XML record template.
            template_filename = data["templateFilename"]
            template = get_record_template(template_filename)
            if not template:
                response["resultCode"] = 108
            else:
                record = create_record(template)[0]
                if not record:
                    response["resultCode"] = 109
                else:
                    record_add_field(record, "001", controlfield_value=str(new_recid))
                    create_cache_file(new_recid, uid, record, True)
                    response["resultCode"], response["newRecID"] = 7, new_recid

        elif new_type == "clone":
            # Clone an existing record (from the users cache).
            existing_cache = cache_exists(recid, uid)
            if existing_cache:
                try:
                    record = get_cache_file_contents(recid, uid)[2]
                except:
                    # if, for example, the cache format was wrong (outdated)
                    record = get_bibrecord(recid)
            else:
                # Cache missing. Fall back to using original version.
                record = get_bibrecord(recid)
            record_delete_field(record, "001")
            record_add_field(record, "001", controlfield_value=str(new_recid))
            create_cache_file(new_recid, uid, record, True)
            response["resultCode"], response["newRecID"] = 8, new_recid
    elif request_type == "getRecord":
        # Fetch the record. Possible error situations:
        # - Non-existing record
        # - Deleted record
        # - Record locked by other user
        # - Record locked by queue
        # A cache file will be created if it does not exist.
        # If the cache is outdated (i.e., not based on the latest DB revision),
        # cacheOutdated will be set to True in the response.
        record_status = record_exists(recid)
        existing_cache = cache_exists(recid, uid)
        read_only_mode = False

        if data.has_key("inReadOnlyMode"):
            read_only_mode = data["inReadOnlyMode"]

        if record_status == 0:
            response["resultCode"] = 102
        elif record_status == -1:
            response["resultCode"] = 103
        elif not read_only_mode and not existing_cache and record_locked_by_other_user(recid, uid):
            response["resultCode"] = 104
        elif (
            not read_only_mode
            and existing_cache
            and cache_expired(recid, uid)
            and record_locked_by_other_user(recid, uid)
        ):
            response["resultCode"] = 104
        elif not read_only_mode and record_locked_by_queue(recid):
            response["resultCode"] = 105
        else:
            if data.get("deleteRecordCache"):
                delete_cache_file(recid, uid)
                existing_cache = False
                pending_changes = []
                disabled_hp_changes = {}
            if read_only_mode:
                if data.has_key("recordRevision"):
                    record_revision_ts = data["recordRevision"]
                    record_xml = get_marcxml_of_revision(recid, record_revision_ts)
                    record = create_record(record_xml)[0]
                    record_revision = timestamp_to_revision(record_revision_ts)
                    pending_changes = []
                    disabled_hp_changes = {}
                else:
                    # a normal cacheless retrieval of a record
                    record = get_bibrecord(recid)
                    record_revision = get_record_last_modification_date(recid)
                    pending_changes = []
                    disabled_hp_changes = {}
                cache_dirty = False
                mtime = 0
                undo_list = []
                redo_list = []
            elif not existing_cache:
                record_revision, record = create_cache_file(recid, uid)
                mtime = get_cache_mtime(recid, uid)
                pending_changes = []
                disabled_hp_changes = {}
                undo_list = []
                redo_list = []
                cache_dirty = False
            else:
                # TODO: This try except should be replaced with something nicer,
                #      like an argument indicating if a new cache file is to
                #      be created
                try:
                    cache_dirty, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list = get_cache_file_contents(
                        recid, uid
                    )
                    touch_cache_file(recid, uid)
                    mtime = get_cache_mtime(recid, uid)
                    if not latest_record_revision(recid, record_revision) and get_record_revisions(recid) != ():
                        # This sould prevent from using old cache in case of
                        # viewing old version. If there are no revisions,
                        # it means we should skip this step because this
                        # is a new record
                        response["cacheOutdated"] = True

                except:
                    record_revision, record = create_cache_file(recid, uid)
                    mtime = get_cache_mtime(recid, uid)
                    pending_changes = []
                    disabled_hp_changes = {}
                    cache_dirty = False
                    undo_list = []
                    redo_list = []
            if data["clonedRecord"]:
                response["resultCode"] = 9
            else:
                response["resultCode"] = 3
            revision_author = get_record_revision_author(recid, record_revision)
            last_revision_ts = revision_to_timestamp(get_record_last_modification_date(recid))
            revisions_history = get_record_revision_timestamps(recid)
            number_of_physical_copies = get_number_copies(recid)
            bibcirc_details_URL = create_item_details_url(recid, ln)
            can_have_copies = can_record_have_physical_copies(recid)

            response["cacheDirty"], response["record"], response["cacheMTime"], response["recordRevision"], response[
                "revisionAuthor"
            ], response["lastRevision"], response["revisionsHistory"], response["inReadOnlyMode"], response[
                "pendingHpChanges"
            ], response[
                "disabledHpChanges"
            ], response[
                "undoList"
            ], response[
                "redoList"
            ] = (
                cache_dirty,
                record,
                mtime,
                revision_to_timestamp(record_revision),
                revision_author,
                last_revision_ts,
                revisions_history,
                read_only_mode,
                pending_changes,
                disabled_hp_changes,
                undo_list,
                redo_list,
            )
            response["numberOfCopies"] = number_of_physical_copies
            response["bibCirculationUrl"] = bibcirc_details_URL
            response["canRecordHavePhysicalCopies"] = can_have_copies
            # Set tag format from user's session settings.
            try:
                tagformat_settings = session_param_get(req, "bibedit_tagformat")
                tagformat = tagformat_settings[recid]
            except KeyError:
                tagformat = CFG_BIBEDIT_TAG_FORMAT
            response["tagFormat"] = tagformat

    elif request_type == "submit":
        # Submit the record. Possible error situations:
        # - Missing cache file
        # - Cache file modified in other editor
        # - Record locked by other user
        # - Record locked by queue
        # - Invalid XML characters
        # If the cache is outdated cacheOutdated will be set to True in the
        # response.
        if not cache_exists(recid, uid):
            response["resultCode"] = 106
        elif not get_cache_mtime(recid, uid) == data["cacheMTime"]:
            response["resultCode"] = 107
        elif cache_expired(recid, uid) and record_locked_by_other_user(recid, uid):
            response["resultCode"] = 104
        elif record_locked_by_queue(recid):
            response["resultCode"] = 105
        else:
            try:
                tmp_result = get_cache_file_contents(recid, uid)
                record_revision = tmp_result[1]
                record = tmp_result[2]
                pending_changes = tmp_result[3]
                #                disabled_changes = tmp_result[4]

                xml_record = print_rec(record)
                record, status_code, list_of_errors = create_record(xml_record)
                if status_code == 0:
                    response["resultCode"], response["errors"] = 110, list_of_errors
                elif not data["force"] and not latest_record_revision(recid, record_revision):
                    response["cacheOutdated"] = True
                else:
                    save_xml_record(recid, uid)
                    response["resultCode"] = 4
            except:
                response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_wrong_cache_file_format"]
    elif request_type == "revert":
        revId = data["revId"]
        job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups()
        revision_xml = get_marcxml_of_revision(recid, job_date)
        save_xml_record(recid, uid, revision_xml)
        if cache_exists(recid, uid):
            delete_cache_file(recid, uid)
        response["resultCode"] = 4

    elif request_type == "cancel":
        # Cancel editing by deleting the cache file. Possible error situations:
        # - Cache file modified in other editor
        if cache_exists(recid, uid):
            if get_cache_mtime(recid, uid) == data["cacheMTime"]:
                delete_cache_file(recid, uid)
                response["resultCode"] = 5
            else:
                response["resultCode"] = 107
        else:
            response["resultCode"] = 5

    elif request_type == "deleteRecord":
        # Submit the record. Possible error situations:
        # - Record locked by other user
        # - Record locked by queue
        # As the user is requesting deletion we proceed even if the cache file
        # is missing and we don't check if the cache is outdated or has
        # been modified in another editor.
        existing_cache = cache_exists(recid, uid)
        pending_changes = []

        if has_copies(recid):
            response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_physical_copies_exist"]
        elif existing_cache and cache_expired(recid, uid) and record_locked_by_other_user(recid, uid):
            response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_rec_locked_by_user"]
        elif record_locked_by_queue(recid):
            response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_rec_locked_by_queue"]
        else:
            if not existing_cache:
                record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list = create_cache_file(
                    recid, uid
                )
            else:
                try:
                    record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list = get_cache_file_contents(
                        recid, uid
                    )[
                        1:
                    ]
                except:
                    record_revision, record, pending_changes, deactivated_hp_changes = create_cache_file(recid, uid)
            record_add_field(record, "980", " ", " ", "", [("c", "DELETED")])
            undo_list = []
            redo_list = []
            update_cache_file_contents(
                recid, uid, record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list
            )
            save_xml_record(recid, uid)
            delete_related_holdingpen_changes(recid)  # we don't need any changes
            # related to a deleted record
            response["resultCode"] = 10

    elif request_type == "deleteRecordCache":
        # Delete the cache file. Ignore the request if the cache has been
        # modified in another editor.
        if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == data["cacheMTime"]:
            delete_cache_file(recid, uid)
        response["resultCode"] = 11

    elif request_type == "prepareRecordMerge":
        # We want to merge the cache with the current DB version of the record,
        # so prepare an XML file from the file cache, to be used by BibMerge.
        # Possible error situations:
        # - Missing cache file
        # - Record locked by other user
        # - Record locked by queue
        # We don't check if cache is outdated (a likely scenario for this
        # request) or if it has been modified in another editor.
        if not cache_exists(recid, uid):
            response["resultCode"] = 106
        elif cache_expired(recid, uid) and record_locked_by_other_user(recid, uid):
            response["resultCode"] = 104
        elif record_locked_by_queue(recid):
            response["resultCode"] = 105
        else:
            save_xml_record(recid, uid, to_upload=False, to_merge=True)
            response["resultCode"] = 12

    return response
Exemple #23
0
    def _next_value(self, recid=None, xml_record=None, bibrecord=None):
        """
        Returns the next texkey for the given recid

        @param recid: id of the record where the texkey will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @return: next texkey for the given recid.
        @rtype: string

        @raises TexkeyNoAuthorError: No main author (100__a) or collaboration
        (710__g) in the given recid
        """
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif bibrecord is None:
            bibrecord = get_bibrecord(recid)

        main_author = record_get_field_value(bibrecord,
                                             tag="100",
                                             ind1="",
                                             ind2="",
                                             code="a")

        if not main_author:
            # Try with collaboration name
            main_author = record_get_field_value(bibrecord,
                                                 tag="710",
                                                 ind1="",
                                                 ind2="",
                                                 code="g")
            main_author = "".join([
                p for p in main_author.split() if p.lower() != "collaboration"
            ])

        if not main_author:
            # Try with corporate author
            main_author = record_get_field_value(bibrecord,
                                                 tag="110",
                                                 ind1="",
                                                 ind2="",
                                                 code="a")
            if not main_author:
                # Check if it is a Proceedings record
                collections = [
                    collection.lower() for collection in
                    record_get_field_values(bibrecord, "980", code="a")
                ]
                if "proceedings" in collections:
                    main_author = "Proceedings"
                else:
                    raise TexkeyNoAuthorError

        # Remove utf-8 special characters
        main_author = unidecode(main_author.decode('utf-8'))
        texkey_first_part = ""
        try:
            texkey_first_part = main_author.split(',')[0]
        except KeyError:
            raise TexkeyNoAuthorError

        # sanitize for texkey use, require at least one letter
        texkey_first_part = re.sub(r'[^-A-Za-z0-9.:/^_;&*<>?|!$+]', '',
                                   texkey_first_part)
        if len(texkey_first_part) < 1 \
           or not re.search(r'[A-Za-z]', texkey_first_part):
            raise TexkeyNoAuthorError

        year = _get_year(
            record_get_field_value(bibrecord,
                                   tag="269",
                                   ind1="",
                                   ind2="",
                                   code="c"))
        if not year:
            year = _get_year(
                record_get_field_value(bibrecord,
                                       tag="260",
                                       ind1="",
                                       ind2="",
                                       code="c"))
            if not year:
                year = _get_year(
                    record_get_field_value(bibrecord,
                                           tag="773",
                                           ind1="",
                                           ind2="",
                                           code="y"))
                if not year:
                    year = _get_year(
                        record_get_field_value(bibrecord,
                                               tag="502",
                                               ind1="",
                                               ind2="",
                                               code="d"))
                    if not year:
                        raise TexkeyNoYearError

        texkey_second_part = ''
        if year:
            texkey_second_part = year

        texkey_third_part = _texkey_random_chars(recid)

        texkey = "%s:%s%s" % \
                 (texkey_first_part, texkey_second_part, texkey_third_part)

        tries = 0
        while self._value_exists(texkey) and tries < TEXKEY_MAXTRIES:
            # Key is already in the DB, generate a new one
            texkey_third_part = _texkey_random_chars(recid, use_random=True)
            texkey = "%s:%s%s" % \
                     (texkey_first_part, texkey_second_part, texkey_third_part)
            tries += 1

        return texkey
def main():
#   search_term = raw_input('INSPIRE search: ')
    search = perform_request_search(p=search_term, cc='HEP')
    for r in search:
        print "Working on", r
        PAPERS[r] = {'People':{'Student person IDs':[], 'Author person IDs':[]}, 'Citation logs':{'Including self-cites':{'Total':{}, 'HEP-EX':{}, 'HEP-TH':{}, 'Q1':{},  'Q2':{}, 'Q3':{}, 'Q4':{}},
		     		   'Excluding self-cites':{'Total':{}, 'HEP-EX': {}, 'HEP-TH':{}, 'Q1':{},  'Q2':{}, 'Q3':{}, 'Q4':{}}}}



# Get pids for authors of a paper. Is there a way to associate a pid with an author name?
        PAPERS[r]['People']['Author person IDs'] =[val for _, val in get_personid_signature_association_for_paper(r).iteritems()]
        canonical_names = []
# Get BAI of pid
        for pid in PAPERS[r]['People']['Author person IDs']:
            foo = get_canonical_name_of_author(pid)
            for x in foo:
                for y in x:
                    canonical_names.append(y)
# Find BAI in HEPNames, get INSPIRE-ID, find students of author, get BAIS, convert to pids, add to dict
        for bai in canonical_names:
            bai_search = perform_request_search(p='035__a:%s' % bai, cc='HepNames')
            if len(bai_search) == 1:
                for person in bai_search:
                    record = get_bibrecord(person)
                    inspireid = record_get_field_values(record, '035', code='a', filter_subfield_code='9', filter_subfield_value='INSPIRE')
                    if inspireid:
                        student_search = perform_request_search(p='701__i:%s' % inspireid[0], cc='HepNames')
                        if len(student_search) > 0:
                            for student in student_search:
                                srecord = get_bibrecord(student)
                                sbai = record_get_field_values(srecord, '035', code='a', filter_subfield_code='9', filter_subfield_value='BAI')
                                if sbai:
                                    try:
                                        student_pid = int(get_author_by_canonical_name(sbai)[0][0])
                                        PAPERS[r]['People']['Student person IDs'].append(student_pid)
                                    except IndexError:
                                        pass
            
        dates = []
# Get total citations of paper
        cite_search = perform_request_search(p='refersto:recid:%i collection:published ' % r, cc='HEP')
        for c in cite_search:
            xciteself = True
            xciteprof = True
# Get pids of citing authors, indicate whether citing paper is a self-cite
            citing_pids = [val for _, val in get_personid_signature_association_for_paper(c).iteritems()]
#            print 'authors', PAPERS[r]['People']['Author person IDs']
#            print 'students', PAPERS[r]['People']['Student person IDs']
#            print 'citing pids', citing_pids
#            print PAPERS[r]
            if not any(author in citing_pids for author in PAPERS[r]['People']['Author person IDs']):
                xciteself = False
            if not any(author in citing_pids for author in PAPERS[r]['People']['Student person IDs']):
                xciteprof = False
            date = get_date(c)
            dates.append(date)
            if date in PAPERS[r]['Citation logs']['Including self-cites']['Total']:
                PAPERS[r]['Citation logs']['Including self-cites']['Total'][date] += 1
            else:
                PAPERS[r]['Citation logs']['Including self-cites']['Total'][date] = 1
            
            if xciteself or xciteprof:
                if date in PAPERS[r]['Citation logs']['Excluding self-cites']['Total']:
                    PAPERS[r]['Citation logs']['Excluding self-cites']['Total'][date] += 1
                else:
                    PAPERS[r]['Citation logs']['Excluding self-cites']['Total'][date] = 1
# Get hep-ex or hep-th citations of paper
            fieldcode = get_fieldcode(c)
            if fieldcode:
                if date in PAPERS[r]['Citation logs']['Including self-cites'][fieldcode]:
                    PAPERS[r]['Citation logs']['Including self-cites'][fieldcode][date] += 1
                else:
                    PAPERS[r]['Citation logs']['Including self-cites'][fieldcode][date] = 1
                if xciteself or xciteprof:
                    if date in PAPERS[r]['Citation logs']['Excluding self-cites'][fieldcode]:
                        PAPERS[r]['Citation logs']['Excluding self-cites'][fieldcode][date] += 1
                    else:
                        PAPERS[r]['Citation logs']['Excluding self-cites'][fieldcode][date] = 1

# Separate Q1-4 citations
            journal_group = get_journal(r)
            if journal_group:
                if date in PAPERS[r]['Citation logs']['Including self-cites'][journal_group]:
                    PAPERS[r]['Citation logs']['Including self-cites'][journal_group][date] += 1
                else:
                    PAPERS[r]['Citation logs']['Including self-cites'][journal_group][date] = 1
                if xciteself or xciteprof:
                    if date in PAPERS[r]['Citation logs']['Excluding self-cites'][journal_group]:
                        PAPERS[r]['Citation logs']['Excluding self-cites'][journal_group][date] += 1
                    else:
                        PAPERS[r]['Citation logs']['Excluding self-cites'][journal_group][date] = 1

# put data in CSV format
#    csv_output = []
    for key, val in PAPERS.iteritems():
#get average cites/year
        total_avg = 0
        hepex_avg = 0
        hepth_avg = 0
        Q1_avg = 0
        Q2_avg = 0
        Q3_avg = 0
        Q4_avg = 0
        xtotal_avg = 0
        xhepex_avg = 0
        xhepth_avg = 0
        xQ1_avg = 0
        xQ2_avg = 0
        xQ3_avg = 0
        xQ4_avg = 0

        if sum(PAPERS[key]['Citation logs']['Including self-cites']['Total'].values()) > 0:
            total_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['Total'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['Total'])
        if sum(PAPERS[key]['Citation logs']['Including self-cites']['HEP-EX'].values()) > 0:
            hepex_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['HEP-EX'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['HEP-EX'])
        if sum(PAPERS[key]['Citation logs']['Including self-cites']['HEP-TH'].values()) > 0:
            hepth_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['HEP-TH'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['HEP-TH'])
        if sum(PAPERS[key]['Citation logs']['Including self-cites']['Q1'].values()) > 0:
            Q1_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['Q1'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['Q1'])
        if sum(PAPERS[key]['Citation logs']['Including self-cites']['Q2'].values()) > 0:
            Q2_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['Q2'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['Q2'])
        if sum(PAPERS[key]['Citation logs']['Including self-cites']['Q3'].values()) > 0:
            Q3_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['Q3'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['Q3'])
        if sum(PAPERS[key]['Citation logs']['Including self-cites']['Q4'].values()) > 0:
            Q4_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['Q4'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['Q4'])

        if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Total'].values()) > 0:
            xtotal_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Total'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['Total'])
        if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-EX'].values()) > 0:
            xhepex_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-EX'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-EX'])
        if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-TH'].values()) > 0:
            xhepth_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-TH'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-TH'])
        if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q1'].values()) > 0:
            xQ1_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q1'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['Q1'])
        if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q2'].values()) > 0:
            xQ2_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q2'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['Q2'])
        if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q3'].values()) > 0:
            xQ3_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q3'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['Q3'])
        if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q4'].values()) > 0:
            xQ4_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q4'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['Q4'])
      
        PAPERS[key]['Citation logs']['Including self-cites']['Total']['Average'] = total_avg
        PAPERS[key]['Citation logs']['Including self-cites']['HEP-EX']['Average'] = hepex_avg
        PAPERS[key]['Citation logs']['Including self-cites']['HEP-TH']['Average'] = hepth_avg
        PAPERS[key]['Citation logs']['Including self-cites']['Q1']['Average'] = Q1_avg
        PAPERS[key]['Citation logs']['Including self-cites']['Q2']['Average'] = Q2_avg
        PAPERS[key]['Citation logs']['Including self-cites']['Q3']['Average'] = Q3_avg
        PAPERS[key]['Citation logs']['Including self-cites']['Q4']['Average'] = Q4_avg

        PAPERS[key]['Citation logs']['Excluding self-cites']['Total']['Average'] = xtotal_avg
        PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-EX']['Average'] = xhepex_avg
        PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-TH']['Average'] = xhepth_avg
        PAPERS[key]['Citation logs']['Excluding self-cites']['Q1']['Average'] = xQ1_avg
        PAPERS[key]['Citation logs']['Excluding self-cites']['Q2']['Average'] = xQ2_avg
        PAPERS[key]['Citation logs']['Excluding self-cites']['Q3']['Average'] = xQ3_avg
        PAPERS[key]['Citation logs']['Excluding self-cites']['Q4']['Average'] = xQ4_avg

    with open('bubble_SUSY.dict', 'wb') as dict_out:
        dump(PAPERS, dict_out)