def show_papers(personid, external_id=None, orcid=None, inspire=None): search = 'select * from aidPERSONIDPAPERS where personid=' search += str(personid) + ' and flag>-2' result = run_sql(search) hep_records = '' for personid, table, bibref, bibrec, author, match, flag, cul, date \ in result: #for personid, table, bibref, bibrec, author in result: #rec = AmendableRecord(get_bibrecord(bibrec)) position = -1 author_name = get_name_by_bibref((table, bibref)) for key, value in AmendableRecord(get_bibrecord(bibrec)).\ iterfields(['{0}__%'.format(table, )]): if (key[0] == '700__a' or key[0] == '100__a') and \ value == author_name: position = key[1] if position >= 0: for key, value in AmendableRecord(get_bibrecord(bibrec)).\ iterfields(['{0}__%'.format(table, )]): if key[1] == position and key[0] in \ ('{0}__a'.format(table), '{0}__i'. format(table), '{0}__j'.format(table), '{0}__k'.format(table), '{0}__m'.format(table), ): if value.replace('ORCID:', '') == external_id and \ value.replace('ORCID:', '') != orcid and \ value != inspire: hep_records += " " + " ".join([str(bibrec), author, value, '\n']) if hep_records: return hep_records return None
def getInspireRecordMetadata(inspireID): '''For a given INSPIRE ID, collect the desired metadata fields and return them. ''' fieldArray = {'0247_2': 'stdIDsource', '0247_a': 'stdID', '245__a': 'title', '8564_u': 'files'} fieldValues = {} fieldKeys = fieldArray.keys() for fKey in fieldKeys: fieldValues[fKey] = get_fieldvalues(inspireID, fKey) print "fieldValues=", fKey, ":", fieldValues[fKey] # ThS suggested approach for dealing with the problem of two repeating # fields that correspond (say, a type in one field, and a value in another) record = AmendableRecord(get_bibrecord(inspireID)) for _, val in record.iterfield('035__a', subfield_filter=('9', 'arXiv')): fieldValues['arxivID'] = val pdfList = [] for z in fieldValues['8564_u']: if 'pdf' in z: pdfList.append(z) fieldValues['8564_u'] = pdfList return fieldValues
def create_ticket(recid, bibcatalog_system, queue=CFG_REFEXTRACT_TICKET_QUEUE): write_message("bibcatalog_system %s" % bibcatalog_system, verbose=1) write_message("queue %s" % queue, verbose=1) if bibcatalog_system and queue: subject = "Refs for #%s" % recid # Add report number in the subjecet report_number = "" record = get_bibrecord(recid) in_hep = False for collection_tag in record_get_field_instances(record, "980"): for collection in field_get_subfield_values(collection_tag, "a"): if collection == "HEP": in_hep = True # Only create tickets for HEP if not in_hep: write_message("not in hep", verbose=1) return for report_tag in record_get_field_instances(record, "037"): for category in field_get_subfield_values(report_tag, "c"): if category.startswith("astro-ph"): write_message("astro-ph", verbose=1) # We do not curate astro-ph return for report_number in field_get_subfield_values(report_tag, "a"): subject += " " + report_number break text = "%s/record/edit/#state=edit&recid=%s" % (CFG_SITE_SECURE_URL, recid) bibcatalog_system.ticket_submit(subject=subject, queue=queue, text=text, recordid=recid)
def populate_cnums(): """ Populates table seqSTORE with the cnums present in CONFERENCE records """ # First get all records from conference collection conf_records = perform_request_search(cc="Conferences", p="111__g:C*", rg=0) for recid in conf_records: cnum = record_get_field_value(get_bibrecord(recid), tag="111", ind1="", ind2="", code="g") if cnum: if not _cnum_exists(cnum): _insert_cnum(cnum) print "cnum %s from record %s inserted" % (cnum, recid)
def _next_value(self, recid=None, xml_record=None, start_date=None): """ Returns the next cnum for the given recid @param recid: id of the record where the cnum will be generated @type recid: int @param xml_record: record in xml format @type xml_record: string @param start_date: use given start date @type start_date: string @return: next cnum for the given recid. Format is Cyy-mm-dd.[.1n] @rtype: string @raises ConferenceNoStartDateError: No date information found in the given recid """ bibrecord = None if recid is None and xml_record is not None: bibrecord = create_record(xml_record)[0] elif recid is not None: bibrecord = get_bibrecord(recid) if start_date is None and bibrecord is not None: start_date = record_get_field_value(bibrecord, tag="111", ind1="", ind2="", code="x") if not start_date: raise ConferenceNoStartDateError base_cnum = "C" + start_date[2:] record_cnums = self._get_record_cnums(base_cnum) if not record_cnums: new_cnum = base_cnum elif len(record_cnums) == 1: new_cnum = base_cnum + '.' + '1' else: # Get the max current revision, cnums are in format Cyy-mm-dd, # Cyy-mm-dd.1, Cyy-mm-dd.2 highest_revision = max( [int(rev[0].split('.')[1]) for rev in record_cnums[1:]]) new_cnum = base_cnum + '.' + str(highest_revision + 1) return new_cnum
def _next_value(self, recid=None, xml_record=None, start_date=None): """ Returns the next cnum for the given recid @param recid: id of the record where the cnum will be generated @type recid: int @param xml_record: record in xml format @type xml_record: string @param start_date: use given start date @type start_date: string @return: next cnum for the given recid. Format is Cyy-mm-dd.[.1n] @rtype: string @raises ConferenceNoStartDateError: No date information found in the given recid """ bibrecord = None if recid is None and xml_record is not None: bibrecord = create_record(xml_record)[0] elif recid is not None: bibrecord = get_bibrecord(recid) if start_date is None and bibrecord is not None: start_date = record_get_field_value(bibrecord, tag="111", ind1="", ind2="", code="x") if not start_date: raise ConferenceNoStartDateError base_cnum = "C" + start_date[2:] record_cnums = self._get_record_cnums(base_cnum) if not record_cnums: new_cnum = base_cnum elif len(record_cnums) == 1: new_cnum = base_cnum + '.' + '1' else: # Get the max current revision, cnums are in format Cyy-mm-dd, # Cyy-mm-dd.1, Cyy-mm-dd.2 highest_revision = max([int(rev[0].split('.')[1]) for rev in record_cnums[1:]]) new_cnum = base_cnum + '.' + str(highest_revision + 1) return new_cnum
def _create_ticket(recid, bibcatalog_system, queue): subject = "Refs for #%s" % recid if CFG_INSPIRE_SITE: # Add report number in the subjecet report_number = "" record = get_bibrecord(recid) in_core = False for collection_tag in record_get_field_instances(record, "980"): for collection in field_get_subfield_values(collection_tag, 'a'): if collection == 'CORE': in_core = True if collection == 'arXiv': # Do not create tickets for arxiv papers # Tickets for arxiv papers are created in bibcatelog write_message("arXiv paper", verbose=1) return # Do not create tickets for user submissions for source_field in record_get_field_instances(record, "541"): for source in field_get_subfield_values(source_field, "c"): if source == "submission": write_message("User submitted paper", verbose=1) return # Only create tickets for CORE papers if not in_core: write_message("not in core papers", verbose=1) return # Do not create tickets for old records creation_date = run_sql( """SELECT creation_date FROM bibrec WHERE id = %s""", [recid])[0][0] if creation_date < datetime.now() - timedelta(days=30 * 4): return for report_tag in record_get_field_instances(record, "037"): for report_number in field_get_subfield_values(report_tag, 'a'): subject += " " + report_number break text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL, recid) bibcatalog_system.ticket_submit(subject=subject, queue=queue, text=text, recordid=recid)
def _create_ticket(recid, bibcatalog_system, queue): subject = "Refs for #%s" % recid if CFG_INSPIRE_SITE: # Add report number in the subjecet report_number = "" record = get_bibrecord(recid) in_core = False for collection_tag in record_get_field_instances(record, "980"): for collection in field_get_subfield_values(collection_tag, 'a'): if collection == 'CORE': in_core = True if collection == 'arXiv': # Do not create tickets for arxiv papers # Tickets for arxiv papers are created in bibcatelog write_message("arXiv paper", verbose=1) return # Only create tickets for HEP if not in_core: write_message("not in hep", verbose=1) return # Do not create tickets for old records creation_date = run_sql("""SELECT creation_date FROM bibrec WHERE id = %s""", [recid])[0][0] if creation_date < datetime.now() - timedelta(days=30*4): return for report_tag in record_get_field_instances(record, "037"): for category in field_get_subfield_values(report_tag, 'c'): if category.startswith('astro-ph'): write_message("astro-ph", verbose=1) # We do not curate astro-ph return for report_number in field_get_subfield_values(report_tag, 'a'): subject += " " + report_number break text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL, recid) bibcatalog_system.ticket_submit(subject=subject, queue=queue, text=text, recordid=recid)
def enumerate_records(records): """ Given an array of record IDs this function will yield a triplet of the count (starting from 0), the record ID and the record object. @param record: Array of record IDs @type record: int @yield: tuple (count, recordId, record structure (dict)) """ for i, recid in enumerate(records): record = get_bibrecord(int(recid)) if not record: write_message("Error: could not load record '%s'." % (recid,)) continue yield i, int(recid), AmendableRecord(record)
def load_records_from_id(records): """ Given a record tuple of record id and last updated/created date, this function will yield a tuple with the record id replaced with a record structure iterativly. @param record: tuple of (recid, date-string) Ex: (1, 2012-12-12 12:12:12) @type record: tuple @yield: tuple of (record structure (dict), date-string) """ for recid, date in records: record = get_bibrecord(int(recid)) if not record: write_message("Error: could not load record %s" % (recid, )) continue yield record, date
def populate_cnums(): """ Populates table seqSTORE with the cnums present in CONFERENCE records """ # First get all records from conference collection conf_records = perform_request_search(f="collection", p="CONFERENCES") for recid in conf_records: cnum = record_get_field_value(get_bibrecord(recid), tag="111", ind1="", ind2="", code="g") if cnum: if not _cnum_exists(cnum): _insert_cnum(cnum) print "cnum %s from record %s inserted" % (cnum, recid)
def enumerate_records(records): """ Given an array of record IDs this function will yield a triplet of the count (starting from 0), the record ID and the record object. @param record: Array of record IDs @type record: int @yield: tuple (count, recordId, record structure (dict)) """ for i, recid in enumerate(records): record = get_bibrecord(int(recid)) if not record: write_message("Error: could not load record '%s'." % (recid, )) continue yield i, int(recid), AmendableRecord(record)
def load_records_from_id(records): """ Given a record tuple of record id and last updated/created date, this function will yield a tuple with the record id replaced with a record structure iterativly. @param record: tuple of (recid, date-string) Ex: (1, 2012-12-12 12:12:12) @type record: tuple @yield: tuple of (record structure (dict), date-string) """ for recid, date in records: record = get_bibrecord(int(recid)) if not record: write_message("Error: could not load record %s" % (recid,)) continue yield record, date
def create_ticket(recid, bibcatalog_system, queue=CFG_REFEXTRACT_TICKET_QUEUE): write_message('bibcatalog_system %s' % bibcatalog_system, verbose=1) write_message('queue %s' % queue, verbose=1) if bibcatalog_system and queue: subject = "Refs for #%s" % recid # Add report number in the subjecet report_number = "" record = get_bibrecord(recid) in_hep = False for collection_tag in record_get_field_instances(record, "980"): for collection in field_get_subfield_values(collection_tag, 'a'): if collection == 'HEP': in_hep = True # Only create tickets for HEP if not in_hep: write_message("not in hep", verbose=1) return for report_tag in record_get_field_instances(record, "037"): for category in field_get_subfield_values(report_tag, 'c'): if category.startswith('astro-ph'): write_message("astro-ph", verbose=1) # We do not curate astro-ph return for report_number in field_get_subfield_values(report_tag, 'a'): subject += " " + report_number break text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL, \ recid) bibcatalog_system.ticket_submit(subject=subject, queue=queue, text=text, recordid=recid)
ind2 = marc[4].replace('_', ' ') sfcode = marc[5] to_split = fields_to_split(record, tag, ind1, ind2, sfcode) if not to_split: continue # work from the back to try to preserve order positions = to_split.keys() positions.sort(reverse=True) for global_pos in positions: (parts, rest_before, rest_after) = to_split[global_pos] message += " - split %s %s" % (tag, parts) record_delete_field(record, tag, ind1, ind2, field_position_global=global_pos) parts.reverse() for subfield in parts: field = rest_before + [subfield, ] + rest_after record_add_field(record, tag, ind1, ind2, '', field, field_position_global=global_pos) if message: record.set_amended(message) if __name__ == '__main__': for record in test_records: record = AmendableRecord(get_bibrecord(record)) record.rule = {} record.rule['name'] = 'melissa' record.rule['holdingpen'] = False check_record(record)
id[1][0], id[2], recid, inspire_id) # record.warn("%s from HEPNames doesn't match id for author %s in record %s (%s)" % (id[1][0], id[2], record, inspire_id)) else: print "email: %s, inspire-id: %s" % (id[2], id[1][0]) additions.append((id[0], 'i', id[1][0])) if id[1][1]: if orcid_true: if orcid == id[1][1]: print "%s in %s already has an ORICD" % (id[2], recid) else: print "%s from HEPNames doesn't match id for author %s in record %s (%s)" % ( id[1][1], id[2], recid, orcid) # record.warn("%s from HEPNames doesn't match id for author %s in record %s (%s)" % (id[1][1], id[2], recid, orcid)) else: print "email: %s, orcid: %s" % (id[2], id[1][1]) additions.append((id[0], 'j', id[1][1])) print "additions: ", additions for addition in additions: print "Adding %s to tag %s at position %s in %s" % ( addition[2], addition[0][0], addition[0][1], recid) # record_add_subfield_into(record, addition[0][0], addition[1], addition[2], field_position_local=addition[0][1]) if __name__ == '__main__': for r in test_records: print 'working on ', r record = AmendableRecord(get_bibrecord(r)) check_record(record)
def perform_request_record(req, request_type, recid, uid, data): """Handle 'major' record related requests like fetching, submitting or deleting a record, cancel editing or preparing a record for merging. """ response = {} if request_type == 'newRecord': # Create a new record. new_recid = reserve_record_id() new_type = data['newType'] if new_type == 'empty': # Create a new empty record. create_cache_file(recid, uid) response['resultCode'], response['newRecID'] = 6, new_recid elif new_type == 'template': # Create a new record from XML record template. template_filename = data['templateFilename'] template = get_record_template(template_filename) if not template: response['resultCode'] = 108 else: record = create_record(template)[0] if not record: response['resultCode'] = 109 else: record_add_field(record, '001', controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response['resultCode'], response['newRecID'] = 7, new_recid elif new_type == 'clone': # Clone an existing record (from the users cache). existing_cache = cache_exists(recid, uid) if existing_cache: try: record = get_cache_file_contents(recid, uid)[2] except: # if, for example, the cache format was wrong (outdated) record = get_bibrecord(recid) else: # Cache missing. Fall back to using original version. record = get_bibrecord(recid) record_delete_field(record, '001') record_add_field(record, '001', controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response['resultCode'], response['newRecID'] = 8, new_recid elif request_type == 'getRecord': # Fetch the record. Possible error situations: # - Non-existing record # - Deleted record # - Record locked by other user # - Record locked by queue # A cache file will be created if it does not exist. # If the cache is outdated (i.e., not based on the latest DB revision), # cacheOutdated will be set to True in the response. record_status = record_exists(recid) existing_cache = cache_exists(recid, uid) read_only_mode = False if data.has_key("inReadOnlyMode"): read_only_mode = data['inReadOnlyMode'] if record_status == 0: response['resultCode'] = 102 elif record_status == -1: response['resultCode'] = 103 elif not read_only_mode and not existing_cache and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif not read_only_mode and existing_cache and \ cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif not read_only_mode and record_locked_by_queue(recid): response['resultCode'] = 105 else: if data.get('deleteRecordCache'): delete_cache_file(recid, uid) existing_cache = False pending_changes = [] disabled_hp_changes = {} if read_only_mode: if data.has_key('recordRevision'): record_revision_ts = data['recordRevision'] record_xml = get_marcxml_of_revision(recid, record_revision_ts) record = create_record(record_xml)[0] record_revision = timestamp_to_revision(record_revision_ts) pending_changes = [] disabled_hp_changes = {} else: # a normal cacheless retrieval of a record record = get_bibrecord(recid) record_revision = get_record_last_modification_date(recid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False mtime = 0 elif not existing_cache: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False else: try: cache_dirty, record_revision, record, pending_changes, disabled_hp_changes= \ get_cache_file_contents(recid, uid) touch_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) if not latest_record_revision(recid, record_revision): response['cacheOutdated'] = True except: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False if data['clonedRecord']: response['resultCode'] = 9 else: response['resultCode'] = 3 revision_author = get_record_revision_author(recid, record_revision) last_revision_ts = revision_to_timestamp(get_record_last_modification_date(recid)) revisions_history = get_record_revision_timestamps(recid) response['cacheDirty'], response['record'], response['cacheMTime'],\ response['recordRevision'], response['revisionAuthor'], \ response['lastRevision'], response['revisionsHistory'], \ response['inReadOnlyMode'], response['pendingHpChanges'], \ response['disabledHpChanges'] = cache_dirty, record, mtime, \ revision_to_timestamp(record_revision), revision_author, \ last_revision_ts, revisions_history, read_only_mode, pending_changes, \ disabled_hp_changes # Set tag format from user's session settings. try: tagformat_settings = session_param_get(req, 'bibedit_tagformat') tagformat = tagformat_settings[recid] except KeyError: tagformat = CFG_BIBEDIT_TAG_FORMAT response['tagFormat'] = tagformat elif request_type == 'submit': # Submit the record. Possible error situations: # - Missing cache file # - Cache file modified in other editor # - Record locked by other user # - Record locked by queue # - Invalid XML characters # If the cache is outdated cacheOutdated will be set to True in the # response. if not cache_exists(recid, uid): response['resultCode'] = 106 elif not get_cache_mtime(recid, uid) == data['cacheMTime']: response['resultCode'] = 107 elif cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif record_locked_by_queue(recid): response['resultCode'] = 105 else: try: record_revision, record, pending_changes, disabled_changes = get_cache_file_contents(recid, uid)[1:] xml_record = print_rec(record) record, status_code, list_of_errors = create_record(xml_record) if status_code == 0: response['resultCode'], response['errors'] = 110, \ list_of_errors elif not data['force'] and \ not latest_record_revision(recid, record_revision): response['cacheOutdated'] = True else: save_xml_record(recid, uid) response['resultCode'] = 4 except: response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['wrong_cache_file_format'] elif request_type == 'revert': revId = data['revId'] job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups() revision_xml = get_marcxml_of_revision(recid, job_date) save_xml_record(recid, uid, revision_xml) if (cache_exists(recid, uid)): delete_cache_file(recid, uid) response['resultCode'] = 4 elif request_type == 'cancel': # Cancel editing by deleting the cache file. Possible error situations: # - Cache file modified in other editor if cache_exists(recid, uid): if get_cache_mtime(recid, uid) == data['cacheMTime']: delete_cache_file(recid, uid) response['resultCode'] = 5 else: response['resultCode'] = 107 else: response['resultCode'] = 5 elif request_type == 'deleteRecord': # Submit the record. Possible error situations: # - Record locked by other user # - Record locked by queue # As the user is requesting deletion we proceed even if the cache file # is missing and we don't check if the cache is outdated or has # been modified in another editor. existing_cache = cache_exists(recid, uid) pending_changes = [] if existing_cache and cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif record_locked_by_queue(recid): response['resultCode'] = 105 else: if not existing_cache: record_revision, record, pending_changes, desactivated_hp_changes = create_cache_file(recid, uid) else: try: record_revision, record, pending_changes, desactivated_hp_changes = get_cache_file_contents( recid, uid)[1:] except: record_revision, record, pending_changes, desactivated_hp_changes = create_cache_file(recid, uid) record_add_field(record, '980', ' ', ' ', '', [('c', 'DELETED')]) update_cache_file_contents(recid, uid, record_revision, record, pending_changes, desactivated_hp_changes) save_xml_record(recid, uid) delete_related_holdingpen_changes(recid) # we don't need any changes related to a deleted record response['resultCode'] = 10 elif request_type == 'deleteRecordCache': # Delete the cache file. Ignore the request if the cache has been # modified in another editor. if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == \ data['cacheMTime']: delete_cache_file(recid, uid) response['resultCode'] = 11 elif request_type == 'prepareRecordMerge': # We want to merge the cache with the current DB version of the record, # so prepare an XML file from the file cache, to be used by BibMerge. # Possible error situations: # - Missing cache file # - Record locked by other user # - Record locked by queue # We don't check if cache is outdated (a likely scenario for this # request) or if it has been modified in another editor. if not cache_exists(recid, uid): response['resultCode'] = 106 elif cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif record_locked_by_queue(recid): response['resultCode'] = 105 else: save_xml_record(recid, uid, to_upload=False, to_merge=True) response['resultCode'] = 12 return response
def _next_value(self, recid=None, xml_record=None, bibrecord=None): """ Returns the next texkey for the given recid @param recid: id of the record where the texkey will be generated @type recid: int @param xml_record: record in xml format @type xml_record: string @return: next texkey for the given recid. @rtype: string @raises TexkeyNoAuthorError: No main author (100__a) or collaboration (710__g) in the given recid """ if recid is None and xml_record is not None: bibrecord = create_record(xml_record)[0] elif bibrecord is None: bibrecord = get_bibrecord(recid) main_author = record_get_field_value(bibrecord, tag="100", ind1="", ind2="", code="a") if not main_author: # Try with collaboration name main_author = record_get_field_value(bibrecord, tag="710", ind1="", ind2="", code="g") main_author = "".join([p for p in main_author.split() if p.lower() != "collaboration"]) if not main_author: # Try with corporate author main_author = record_get_field_value(bibrecord, tag="100", ind1="", ind2="", code="a") if not main_author: raise TexkeyNoAuthorError # Remove utf-8 special characters main_author = unidecode(main_author.decode('utf-8')) try: texkey_first_part = main_author.split(',')[0].replace(" ", "") except KeyError: texkey_first_part = "" year = record_get_field_value(bibrecord, tag="269", ind1="", ind2="", code="c") if not year: year = record_get_field_value(bibrecord, tag="260", ind1="", ind2="", code="c") if not year: year = record_get_field_value(bibrecord, tag="773", ind1="", ind2="", code="y") if not year: year = record_get_field_value(bibrecord, tag="502", ind1="", ind2="", code="d") if not year: raise TexkeyNoYearError try: texkey_second_part = year.split("-")[0] except KeyError: texkey_second_part = "" texkey_third_part = _texkey_random_chars(recid) texkey = texkey_first_part + ":" + texkey_second_part + texkey_third_part tries = 0 while self._value_exists(texkey) and tries < TEXKEY_MAXTRIES: # Key is already in the DB, generate a new one texkey_third_part = _texkey_random_chars(recid, use_random=True) texkey = texkey_first_part + ":" + texkey_second_part + texkey_third_part tries += 1 return texkey
def _next_value(self, recid=None, xml_record=None, bibrecord=None): """ Returns the next texkey for the given recid @param recid: id of the record where the texkey will be generated @type recid: int @param xml_record: record in xml format @type xml_record: string @return: next texkey for the given recid. @rtype: string @raises TexkeyNoAuthorError: No main author (100__a) or collaboration (710__g) in the given recid """ if recid is None and xml_record is not None: bibrecord = create_record(xml_record)[0] elif bibrecord is None: bibrecord = get_bibrecord(recid) main_author = record_get_field_value(bibrecord, tag="100", ind1="", ind2="", code="a") if not main_author: # Try with collaboration name main_author = record_get_field_value(bibrecord, tag="710", ind1="", ind2="", code="g") main_author = "".join([ p for p in main_author.split() if p.lower() != "collaboration" ]) if not main_author: # Try with corporate author main_author = record_get_field_value(bibrecord, tag="100", ind1="", ind2="", code="a") if not main_author: raise TexkeyNoAuthorError # Remove utf-8 special characters main_author = unidecode(main_author.decode('utf-8')) try: texkey_first_part = main_author.split(',')[0].replace(" ", "") except KeyError: texkey_first_part = "" year = record_get_field_value(bibrecord, tag="269", ind1="", ind2="", code="c") if not year: year = record_get_field_value(bibrecord, tag="260", ind1="", ind2="", code="c") if not year: year = record_get_field_value(bibrecord, tag="773", ind1="", ind2="", code="y") if not year: year = record_get_field_value(bibrecord, tag="502", ind1="", ind2="", code="d") if not year: raise TexkeyNoYearError try: texkey_second_part = year.split("-")[0] except KeyError: texkey_second_part = "" texkey_third_part = _texkey_random_chars(recid) texkey = texkey_first_part + ":" + texkey_second_part + texkey_third_part tries = 0 while self._value_exists(texkey) and tries < TEXKEY_MAXTRIES: # Key is already in the DB, generate a new one texkey_third_part = _texkey_random_chars(recid, use_random=True) texkey = texkey_first_part + ":" + texkey_second_part + texkey_third_part tries += 1 return texkey
def Update_Approval_DB(parameters, curdir, form, user_info=None): """ This function updates the approval database when a document has just been approved or rejected. It uses the [categformatDAM] parameter to compute the category of the document. Must be called after the Get_Report_Number function. Parameters: * categformatDAM: It contains the regular expression which allows the retrieval of the category from the reference number. Eg: if [categformatDAM]="TEST-<CATEG>-.*" and the reference is "TEST-CATEG1-2001-001" then the category will be recognized as "CATEG1". """ global rn, sysno doctype = form['doctype'] act = form['act'] categformat = parameters['categformatDAM'] ## Get the name of the decision file: try: decision_filename = parameters['decision_file'] except KeyError: decision_filename = "" pid = os.getpid() now = time.time() access = "%i_%s" % (now,pid) if act not in ["APP", "APS", "APM", "APO"]: # retrieve category if re.search("<FILE:",categformat): filename = categformat.replace("<FILE:","") filename = filename.replace(">","") if os.path.exists("%s/%s" % (curdir,filename)): fp = open("%s/%s" % (curdir,filename)) category = fp.read() fp.close() else: category="" category = category.replace("\n","") else: categformat = categformat.replace("<CATEG>","([^-]*)") m_categ_search = re.match(categformat, rn) if m_categ_search is not None: if len(m_categ_search.groups()) > 0: ## Found a match for the category of this document. Get it: category = m_categ_search.group(1) else: ## This document has no category. category = "" else: category = "" if category == "": category = "unknown" sth = run_sql("SELECT status,dFirstReq,dLastReq,dAction FROM sbmAPPROVAL WHERE doctype=%s and categ=%s and rn=%s", (doctype,category,rn,)) if len(sth) == 0: run_sql("INSERT INTO sbmAPPROVAL (doctype, categ, rn, status, dFirstReq, dLastReq, dAction, access) VALUES (%s,%s,%s,'waiting',NOW(),NOW(),'',%s)", (doctype,category,rn,access,)) else: run_sql("UPDATE sbmAPPROVAL SET dLastReq=NOW(), status='waiting' WHERE doctype=%s and categ=%s and rn=%s", (doctype,category,rn,)) else: ## Since this is the "APP" action, this call of the function must be ## on behalf of the referee - in order to approve or reject an item. ## We need to get the decision from the decision file: if decision_filename in (None, "", "NULL"): ## We don't have a name for the decision file. ## For backward compatibility reasons, try to read the decision from ## a file called 'decision' in curdir: if os.path.exists("%s/decision" % curdir): fh_decision = open("%s/decision" % curdir, "r") decision = fh_decision.read() fh_decision.close() else: decision = "" else: ## Try to read the decision from the decision file: try: fh_decision = open("%s/%s" % (curdir, decision_filename), "r") decision = fh_decision.read().strip() fh_decision.close() except IOError: ## Oops, unable to open the decision file. decision = "" from invenio.bibrecord import record_delete_field, record_add_field, record_xml_output from invenio.bibedit_utils import get_bibrecord from invenio.bibtask import task_low_level_submission record = get_bibrecord(sysno) ## Either approve or reject the item, based upon the contents ## of 'decision': if decision == "approve": run_sql("UPDATE sbmAPPROVAL SET dAction=NOW(),status='approved' WHERE rn=%s", (rn,)) else: run_sql("UPDATE sbmAPPROVAL SET dAction=NOW(),status='rejected' WHERE rn=%s", (rn,)) if act == "APS": record_delete_field(record, "980") record_add_field(record, '980', ' ', ' ', '', [('a', 'REJBLOG')]) fd, name = tempfile.mkstemp(suffix='.xml', dir=CFG_TMPDIR) os.write(fd, """<collection>\n""") os.write(fd, record_xml_output(record)) os.write(fd, """</collection\n>""") os.close(fd) task_low_level_submission('bibupload', 'admin', '-c', name) task_low_level_submission('bibindex', 'admin') task_low_level_submission('webcoll', 'admin', '-c', "Provisional Blogs") task_low_level_submission('webcoll', 'admin', '-c', "Blogs") return ""
def perform_request_record(req, request_type, recid, uid, data, ln=CFG_SITE_LANG): """Handle 'major' record related requests like fetching, submitting or deleting a record, cancel editing or preparing a record for merging. """ response = {} if request_type == "newRecord": # Create a new record. new_recid = reserve_record_id() new_type = data["newType"] if new_type == "empty": # Create a new empty record. create_cache_file(recid, uid) response["resultCode"], response["newRecID"] = 6, new_recid elif new_type == "template": # Create a new record from XML record template. template_filename = data["templateFilename"] template = get_record_template(template_filename) if not template: response["resultCode"] = 108 else: record = create_record(template)[0] if not record: response["resultCode"] = 109 else: record_add_field(record, "001", controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response["resultCode"], response["newRecID"] = 7, new_recid elif new_type == "clone": # Clone an existing record (from the users cache). existing_cache = cache_exists(recid, uid) if existing_cache: try: record = get_cache_file_contents(recid, uid)[2] except: # if, for example, the cache format was wrong (outdated) record = get_bibrecord(recid) else: # Cache missing. Fall back to using original version. record = get_bibrecord(recid) record_delete_field(record, "001") record_add_field(record, "001", controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response["resultCode"], response["newRecID"] = 8, new_recid elif request_type == "getRecord": # Fetch the record. Possible error situations: # - Non-existing record # - Deleted record # - Record locked by other user # - Record locked by queue # A cache file will be created if it does not exist. # If the cache is outdated (i.e., not based on the latest DB revision), # cacheOutdated will be set to True in the response. record_status = record_exists(recid) existing_cache = cache_exists(recid, uid) read_only_mode = False if data.has_key("inReadOnlyMode"): read_only_mode = data["inReadOnlyMode"] if record_status == 0: response["resultCode"] = 102 elif record_status == -1: response["resultCode"] = 103 elif not read_only_mode and not existing_cache and record_locked_by_other_user(recid, uid): response["resultCode"] = 104 elif ( not read_only_mode and existing_cache and cache_expired(recid, uid) and record_locked_by_other_user(recid, uid) ): response["resultCode"] = 104 elif not read_only_mode and record_locked_by_queue(recid): response["resultCode"] = 105 else: if data.get("deleteRecordCache"): delete_cache_file(recid, uid) existing_cache = False pending_changes = [] disabled_hp_changes = {} if read_only_mode: if data.has_key("recordRevision"): record_revision_ts = data["recordRevision"] record_xml = get_marcxml_of_revision(recid, record_revision_ts) record = create_record(record_xml)[0] record_revision = timestamp_to_revision(record_revision_ts) pending_changes = [] disabled_hp_changes = {} else: # a normal cacheless retrieval of a record record = get_bibrecord(recid) record_revision = get_record_last_modification_date(recid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False mtime = 0 undo_list = [] redo_list = [] elif not existing_cache: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} undo_list = [] redo_list = [] cache_dirty = False else: # TODO: This try except should be replaced with something nicer, # like an argument indicating if a new cache file is to # be created try: cache_dirty, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list = get_cache_file_contents( recid, uid ) touch_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) if not latest_record_revision(recid, record_revision) and get_record_revisions(recid) != (): # This sould prevent from using old cache in case of # viewing old version. If there are no revisions, # it means we should skip this step because this # is a new record response["cacheOutdated"] = True except: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False undo_list = [] redo_list = [] if data["clonedRecord"]: response["resultCode"] = 9 else: response["resultCode"] = 3 revision_author = get_record_revision_author(recid, record_revision) last_revision_ts = revision_to_timestamp(get_record_last_modification_date(recid)) revisions_history = get_record_revision_timestamps(recid) number_of_physical_copies = get_number_copies(recid) bibcirc_details_URL = create_item_details_url(recid, ln) can_have_copies = can_record_have_physical_copies(recid) response["cacheDirty"], response["record"], response["cacheMTime"], response["recordRevision"], response[ "revisionAuthor" ], response["lastRevision"], response["revisionsHistory"], response["inReadOnlyMode"], response[ "pendingHpChanges" ], response[ "disabledHpChanges" ], response[ "undoList" ], response[ "redoList" ] = ( cache_dirty, record, mtime, revision_to_timestamp(record_revision), revision_author, last_revision_ts, revisions_history, read_only_mode, pending_changes, disabled_hp_changes, undo_list, redo_list, ) response["numberOfCopies"] = number_of_physical_copies response["bibCirculationUrl"] = bibcirc_details_URL response["canRecordHavePhysicalCopies"] = can_have_copies # Set tag format from user's session settings. try: tagformat_settings = session_param_get(req, "bibedit_tagformat") tagformat = tagformat_settings[recid] except KeyError: tagformat = CFG_BIBEDIT_TAG_FORMAT response["tagFormat"] = tagformat elif request_type == "submit": # Submit the record. Possible error situations: # - Missing cache file # - Cache file modified in other editor # - Record locked by other user # - Record locked by queue # - Invalid XML characters # If the cache is outdated cacheOutdated will be set to True in the # response. if not cache_exists(recid, uid): response["resultCode"] = 106 elif not get_cache_mtime(recid, uid) == data["cacheMTime"]: response["resultCode"] = 107 elif cache_expired(recid, uid) and record_locked_by_other_user(recid, uid): response["resultCode"] = 104 elif record_locked_by_queue(recid): response["resultCode"] = 105 else: try: tmp_result = get_cache_file_contents(recid, uid) record_revision = tmp_result[1] record = tmp_result[2] pending_changes = tmp_result[3] # disabled_changes = tmp_result[4] xml_record = print_rec(record) record, status_code, list_of_errors = create_record(xml_record) if status_code == 0: response["resultCode"], response["errors"] = 110, list_of_errors elif not data["force"] and not latest_record_revision(recid, record_revision): response["cacheOutdated"] = True else: save_xml_record(recid, uid) response["resultCode"] = 4 except: response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_wrong_cache_file_format"] elif request_type == "revert": revId = data["revId"] job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups() revision_xml = get_marcxml_of_revision(recid, job_date) save_xml_record(recid, uid, revision_xml) if cache_exists(recid, uid): delete_cache_file(recid, uid) response["resultCode"] = 4 elif request_type == "cancel": # Cancel editing by deleting the cache file. Possible error situations: # - Cache file modified in other editor if cache_exists(recid, uid): if get_cache_mtime(recid, uid) == data["cacheMTime"]: delete_cache_file(recid, uid) response["resultCode"] = 5 else: response["resultCode"] = 107 else: response["resultCode"] = 5 elif request_type == "deleteRecord": # Submit the record. Possible error situations: # - Record locked by other user # - Record locked by queue # As the user is requesting deletion we proceed even if the cache file # is missing and we don't check if the cache is outdated or has # been modified in another editor. existing_cache = cache_exists(recid, uid) pending_changes = [] if has_copies(recid): response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_physical_copies_exist"] elif existing_cache and cache_expired(recid, uid) and record_locked_by_other_user(recid, uid): response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_rec_locked_by_user"] elif record_locked_by_queue(recid): response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_rec_locked_by_queue"] else: if not existing_cache: record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list = create_cache_file( recid, uid ) else: try: record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list = get_cache_file_contents( recid, uid )[ 1: ] except: record_revision, record, pending_changes, deactivated_hp_changes = create_cache_file(recid, uid) record_add_field(record, "980", " ", " ", "", [("c", "DELETED")]) undo_list = [] redo_list = [] update_cache_file_contents( recid, uid, record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list ) save_xml_record(recid, uid) delete_related_holdingpen_changes(recid) # we don't need any changes # related to a deleted record response["resultCode"] = 10 elif request_type == "deleteRecordCache": # Delete the cache file. Ignore the request if the cache has been # modified in another editor. if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == data["cacheMTime"]: delete_cache_file(recid, uid) response["resultCode"] = 11 elif request_type == "prepareRecordMerge": # We want to merge the cache with the current DB version of the record, # so prepare an XML file from the file cache, to be used by BibMerge. # Possible error situations: # - Missing cache file # - Record locked by other user # - Record locked by queue # We don't check if cache is outdated (a likely scenario for this # request) or if it has been modified in another editor. if not cache_exists(recid, uid): response["resultCode"] = 106 elif cache_expired(recid, uid) and record_locked_by_other_user(recid, uid): response["resultCode"] = 104 elif record_locked_by_queue(recid): response["resultCode"] = 105 else: save_xml_record(recid, uid, to_upload=False, to_merge=True) response["resultCode"] = 12 return response
def _next_value(self, recid=None, xml_record=None, bibrecord=None): """ Returns the next texkey for the given recid @param recid: id of the record where the texkey will be generated @type recid: int @param xml_record: record in xml format @type xml_record: string @return: next texkey for the given recid. @rtype: string @raises TexkeyNoAuthorError: No main author (100__a) or collaboration (710__g) in the given recid """ if recid is None and xml_record is not None: bibrecord = create_record(xml_record)[0] elif bibrecord is None: bibrecord = get_bibrecord(recid) main_author = record_get_field_value(bibrecord, tag="100", ind1="", ind2="", code="a") if not main_author: # Try with collaboration name main_author = record_get_field_value(bibrecord, tag="710", ind1="", ind2="", code="g") main_author = "".join([ p for p in main_author.split() if p.lower() != "collaboration" ]) if not main_author: # Try with corporate author main_author = record_get_field_value(bibrecord, tag="110", ind1="", ind2="", code="a") if not main_author: # Check if it is a Proceedings record collections = [ collection.lower() for collection in record_get_field_values(bibrecord, "980", code="a") ] if "proceedings" in collections: main_author = "Proceedings" else: raise TexkeyNoAuthorError # Remove utf-8 special characters main_author = unidecode(main_author.decode('utf-8')) texkey_first_part = "" try: texkey_first_part = main_author.split(',')[0] except KeyError: raise TexkeyNoAuthorError # sanitize for texkey use, require at least one letter texkey_first_part = re.sub(r'[^-A-Za-z0-9.:/^_;&*<>?|!$+]', '', texkey_first_part) if len(texkey_first_part) < 1 \ or not re.search(r'[A-Za-z]', texkey_first_part): raise TexkeyNoAuthorError year = _get_year( record_get_field_value(bibrecord, tag="269", ind1="", ind2="", code="c")) if not year: year = _get_year( record_get_field_value(bibrecord, tag="260", ind1="", ind2="", code="c")) if not year: year = _get_year( record_get_field_value(bibrecord, tag="773", ind1="", ind2="", code="y")) if not year: year = _get_year( record_get_field_value(bibrecord, tag="502", ind1="", ind2="", code="d")) if not year: raise TexkeyNoYearError texkey_second_part = '' if year: texkey_second_part = year texkey_third_part = _texkey_random_chars(recid) texkey = "%s:%s%s" % \ (texkey_first_part, texkey_second_part, texkey_third_part) tries = 0 while self._value_exists(texkey) and tries < TEXKEY_MAXTRIES: # Key is already in the DB, generate a new one texkey_third_part = _texkey_random_chars(recid, use_random=True) texkey = "%s:%s%s" % \ (texkey_first_part, texkey_second_part, texkey_third_part) tries += 1 return texkey
def main(): # search_term = raw_input('INSPIRE search: ') search = perform_request_search(p=search_term, cc='HEP') for r in search: print "Working on", r PAPERS[r] = {'People':{'Student person IDs':[], 'Author person IDs':[]}, 'Citation logs':{'Including self-cites':{'Total':{}, 'HEP-EX':{}, 'HEP-TH':{}, 'Q1':{}, 'Q2':{}, 'Q3':{}, 'Q4':{}}, 'Excluding self-cites':{'Total':{}, 'HEP-EX': {}, 'HEP-TH':{}, 'Q1':{}, 'Q2':{}, 'Q3':{}, 'Q4':{}}}} # Get pids for authors of a paper. Is there a way to associate a pid with an author name? PAPERS[r]['People']['Author person IDs'] =[val for _, val in get_personid_signature_association_for_paper(r).iteritems()] canonical_names = [] # Get BAI of pid for pid in PAPERS[r]['People']['Author person IDs']: foo = get_canonical_name_of_author(pid) for x in foo: for y in x: canonical_names.append(y) # Find BAI in HEPNames, get INSPIRE-ID, find students of author, get BAIS, convert to pids, add to dict for bai in canonical_names: bai_search = perform_request_search(p='035__a:%s' % bai, cc='HepNames') if len(bai_search) == 1: for person in bai_search: record = get_bibrecord(person) inspireid = record_get_field_values(record, '035', code='a', filter_subfield_code='9', filter_subfield_value='INSPIRE') if inspireid: student_search = perform_request_search(p='701__i:%s' % inspireid[0], cc='HepNames') if len(student_search) > 0: for student in student_search: srecord = get_bibrecord(student) sbai = record_get_field_values(srecord, '035', code='a', filter_subfield_code='9', filter_subfield_value='BAI') if sbai: try: student_pid = int(get_author_by_canonical_name(sbai)[0][0]) PAPERS[r]['People']['Student person IDs'].append(student_pid) except IndexError: pass dates = [] # Get total citations of paper cite_search = perform_request_search(p='refersto:recid:%i collection:published ' % r, cc='HEP') for c in cite_search: xciteself = True xciteprof = True # Get pids of citing authors, indicate whether citing paper is a self-cite citing_pids = [val for _, val in get_personid_signature_association_for_paper(c).iteritems()] # print 'authors', PAPERS[r]['People']['Author person IDs'] # print 'students', PAPERS[r]['People']['Student person IDs'] # print 'citing pids', citing_pids # print PAPERS[r] if not any(author in citing_pids for author in PAPERS[r]['People']['Author person IDs']): xciteself = False if not any(author in citing_pids for author in PAPERS[r]['People']['Student person IDs']): xciteprof = False date = get_date(c) dates.append(date) if date in PAPERS[r]['Citation logs']['Including self-cites']['Total']: PAPERS[r]['Citation logs']['Including self-cites']['Total'][date] += 1 else: PAPERS[r]['Citation logs']['Including self-cites']['Total'][date] = 1 if xciteself or xciteprof: if date in PAPERS[r]['Citation logs']['Excluding self-cites']['Total']: PAPERS[r]['Citation logs']['Excluding self-cites']['Total'][date] += 1 else: PAPERS[r]['Citation logs']['Excluding self-cites']['Total'][date] = 1 # Get hep-ex or hep-th citations of paper fieldcode = get_fieldcode(c) if fieldcode: if date in PAPERS[r]['Citation logs']['Including self-cites'][fieldcode]: PAPERS[r]['Citation logs']['Including self-cites'][fieldcode][date] += 1 else: PAPERS[r]['Citation logs']['Including self-cites'][fieldcode][date] = 1 if xciteself or xciteprof: if date in PAPERS[r]['Citation logs']['Excluding self-cites'][fieldcode]: PAPERS[r]['Citation logs']['Excluding self-cites'][fieldcode][date] += 1 else: PAPERS[r]['Citation logs']['Excluding self-cites'][fieldcode][date] = 1 # Separate Q1-4 citations journal_group = get_journal(r) if journal_group: if date in PAPERS[r]['Citation logs']['Including self-cites'][journal_group]: PAPERS[r]['Citation logs']['Including self-cites'][journal_group][date] += 1 else: PAPERS[r]['Citation logs']['Including self-cites'][journal_group][date] = 1 if xciteself or xciteprof: if date in PAPERS[r]['Citation logs']['Excluding self-cites'][journal_group]: PAPERS[r]['Citation logs']['Excluding self-cites'][journal_group][date] += 1 else: PAPERS[r]['Citation logs']['Excluding self-cites'][journal_group][date] = 1 # put data in CSV format # csv_output = [] for key, val in PAPERS.iteritems(): #get average cites/year total_avg = 0 hepex_avg = 0 hepth_avg = 0 Q1_avg = 0 Q2_avg = 0 Q3_avg = 0 Q4_avg = 0 xtotal_avg = 0 xhepex_avg = 0 xhepth_avg = 0 xQ1_avg = 0 xQ2_avg = 0 xQ3_avg = 0 xQ4_avg = 0 if sum(PAPERS[key]['Citation logs']['Including self-cites']['Total'].values()) > 0: total_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['Total'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['Total']) if sum(PAPERS[key]['Citation logs']['Including self-cites']['HEP-EX'].values()) > 0: hepex_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['HEP-EX'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['HEP-EX']) if sum(PAPERS[key]['Citation logs']['Including self-cites']['HEP-TH'].values()) > 0: hepth_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['HEP-TH'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['HEP-TH']) if sum(PAPERS[key]['Citation logs']['Including self-cites']['Q1'].values()) > 0: Q1_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['Q1'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['Q1']) if sum(PAPERS[key]['Citation logs']['Including self-cites']['Q2'].values()) > 0: Q2_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['Q2'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['Q2']) if sum(PAPERS[key]['Citation logs']['Including self-cites']['Q3'].values()) > 0: Q3_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['Q3'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['Q3']) if sum(PAPERS[key]['Citation logs']['Including self-cites']['Q4'].values()) > 0: Q4_avg = sum(PAPERS[key]['Citation logs']['Including self-cites']['Q4'].values())/len(PAPERS[key]['Citation logs']['Including self-cites']['Q4']) if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Total'].values()) > 0: xtotal_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Total'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['Total']) if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-EX'].values()) > 0: xhepex_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-EX'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-EX']) if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-TH'].values()) > 0: xhepth_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-TH'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-TH']) if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q1'].values()) > 0: xQ1_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q1'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['Q1']) if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q2'].values()) > 0: xQ2_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q2'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['Q2']) if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q3'].values()) > 0: xQ3_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q3'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['Q3']) if sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q4'].values()) > 0: xQ4_avg = sum(PAPERS[key]['Citation logs']['Excluding self-cites']['Q4'].values())/len(PAPERS[key]['Citation logs']['Excluding self-cites']['Q4']) PAPERS[key]['Citation logs']['Including self-cites']['Total']['Average'] = total_avg PAPERS[key]['Citation logs']['Including self-cites']['HEP-EX']['Average'] = hepex_avg PAPERS[key]['Citation logs']['Including self-cites']['HEP-TH']['Average'] = hepth_avg PAPERS[key]['Citation logs']['Including self-cites']['Q1']['Average'] = Q1_avg PAPERS[key]['Citation logs']['Including self-cites']['Q2']['Average'] = Q2_avg PAPERS[key]['Citation logs']['Including self-cites']['Q3']['Average'] = Q3_avg PAPERS[key]['Citation logs']['Including self-cites']['Q4']['Average'] = Q4_avg PAPERS[key]['Citation logs']['Excluding self-cites']['Total']['Average'] = xtotal_avg PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-EX']['Average'] = xhepex_avg PAPERS[key]['Citation logs']['Excluding self-cites']['HEP-TH']['Average'] = xhepth_avg PAPERS[key]['Citation logs']['Excluding self-cites']['Q1']['Average'] = xQ1_avg PAPERS[key]['Citation logs']['Excluding self-cites']['Q2']['Average'] = xQ2_avg PAPERS[key]['Citation logs']['Excluding self-cites']['Q3']['Average'] = xQ3_avg PAPERS[key]['Citation logs']['Excluding self-cites']['Q4']['Average'] = xQ4_avg with open('bubble_SUSY.dict', 'wb') as dict_out: dump(PAPERS, dict_out)