def insert_thesis_note(curdir): """insert 502__a --> thesis note: syntax: 'University, Doctype, Granted Year' insert 502__b (if possible) insert 655_7 """ doctype = read_file(curdir,"doctype") jsondict = get_pubtype_info(doctype) if "I502__b" in jsondict.keys(): write_file(curdir,"hgf_502__b",jsondict["I502__b"]) all_fields = True if check_field_exists(curdir,"hgf_502__c") and (check_field_exists(curdir,"hgf_260__c") or check_field_exists(curdir,"hgf_502__d")): pass else: all_fields = None if not "I502__a" in jsondict.keys(): all_fields = None if not all_fields: return #if some field is missing, do not create thesis_note norm_doctype = jsondict["I502__a"] if check_field_exists(curdir,"hgf_502__d"): thesis_note = read_file(curdir,"hgf_502__c") + ", " + norm_doctype + ", " + read_file(curdir,"hgf_502__d") # uese granted year else: thesis_note = read_file(curdir,"hgf_502__c") + ", " + norm_doctype + ", " + read_file(curdir,"hgf_260__c") #use publication year if "I650_7a" in jsondict.keys(): write_file(curdir,"hgf_650_7a",jsondict["I650_7a"].encode('utf-8')) if "I650_72" in jsondict.keys(): write_file(curdir,"hgf_650_72",jsondict["I650_72"]) if "I650_70" in jsondict.keys(): write_file(curdir,"hgf_650_70",jsondict["I650_70"]) write_file(curdir,"hgf_650_7x",norm_doctype) write_file(curdir,"hgf_502__a",thesis_note)
def insert_webyear(curdir): """set web year (Wissenschaftlicher Ergebnis Berichtsjahr) This function has to be called after insert_date function""" try: recid = int(get_recordid(curdir)) except: return #when do we get this exception??? orig_record_980 = get_fieldvalues(recid,'980__a') #create_hgf_collection was alreay active at this step and changed 980-field, so we have to get the original collections of the record from database if "VDB" in orig_record_980: return # do not change web_year after it was released by library (collection tag VDB) web_year = None current_year = str(datetime.datetime.now().year) if check_field_exists(curdir,"hgf_260__c"): # publication_year exists pub_year = read_file(curdir,"hgf_260__c") if pub_year == current_year: web_year = pub_year # publication year is current system year --> set web-year else: if check_field_exists(curdir,"hgf_245__f"): # check thesis end_date date = read_file(curdir,"hgf_245__f") #insert_date function has already been executed sdate,edate = date.split(" - ") if ((current_year in edate) or (current_year in sdate)): web_year = current_year # ending year of thesis is current system year --> set web-year if check_field_exists(curdir,"hgf_1112_d"): # check conf end_date date = read_file(curdir,"hgf_1112_d") sdate,edate = date.split(" - ") if ((current_year in edate) or (current_year in sdate)): web_year = current_year # ending year of conference is current system year --> set web-year if web_year: #write web_year write_file(curdir,"hgf_9141_y",web_year)
def handle_0247(curdir): """ Handle persistend identifiers in 0247_. This implies to set $2 to source and $a to value. only in case of user input Note: if we get new PIDs that should be handled we need to adopt this function!""" if check_field_exists(curdir,"hgf_0247_"): listdict_ = read_json(curdir,"hgf_0247_") else: listdict_ = [] if check_field_exists(curdir,"hgf_0247_a2pat"): # Patent text = read_file(curdir,"hgf_0247_a2pat") listdict_.append({"2":"Patent","a":text}) if check_field_exists(curdir,"hgf_0247_a2urn"): # URN text = read_file(curdir,"hgf_0247_a2urn") listdict_.append({"2":"URN","a":text}) if check_field_exists(curdir,"hgf_773__a"): # store DOI in both 773__ and in 0247, this is an input field text = read_file(curdir,"hgf_773__a") listdict_.append({"2":"doi","a":text}) if (not check_field_exists(curdir,"hgf_773__a") and check_field_exists(curdir,"hgf_773__")): # doi can be stored in 773__ as json array dict_773 = read_json(curdir,"hgf_773__") for ent in dict_773: #more then 1 DOI if not "a" in ent.keys(): continue listdict_.append({"2":"doi","a":ent["a"]}) if listdict_ == []: return new_listdict = [] for dict in listdict_: if dict in new_listdict: continue # remove double entries new_listdict.append(dict) write_json(curdir,"hgf_0247_",new_listdict) #Insert DOI into 773__a only in case no 773__a or 773 json array exist if check_field_exists(curdir,"773__a"): return #we have a 773__a if check_field_exists(curdir,"773__"): listdict_773 = read_json(curdir,"773__") for ent in listdict_773: if ent.has_key("a"): return # we have a 773__a for ent in new_listdict: if not ent.has_key("2"): continue if not (ent["2"] == "doi"): continue # map doi into 773__a # write DOI in 773__a if we do not yet have one. # in case of multiple DOIs the first one will win <--> we cannot # write the 773__ because we do not know if other 773__* fields # has been inputted and to which belongs the DOI. TODO! write_file(curdir,"hgf_773__a",ent["a"]) return
def insert_inst_into_980(curdir,uid): """collection handling for institutes""" user_groups = get_usergroups(uid) if check_field_exists(curdir,"hgf_9201_"): if read_file(curdir,"hgf_9201_") == "[]": remove_file(curdir,"hgf_9201_") # delete file in case of empty sequence! TODO: this should not happen and has to be fixed in hgfInstitutes.js if not check_field_exists(curdir,"hgf_9201_"): #make sure that we have at least one institute if str(uid) == "1": return #do not add institutes for admin user_insts = extract_user_institutes("0",user_groups) if user_insts == []: email_txt = "%s is not assigned to any institute. This email was generated from Covert_hgf_fields and function insert_inst_into_980" %get_recordid(curdir) send_email(CFG_SITE_ADMIN_EMAIL, CFG_SITE_ADMIN_EMAIL, "ERROR: no institute assigned", email_txt,header="",html_header="") return #this should not happen! jsondict = user_insts #add institute even if no institute chosen to be inserted into 980 else: jsondict = read_json(curdir,"hgf_9201_") inst_list = [] list_980 = read_json(curdir,"hgf_980__") for inst in jsondict: if {"a":inst["0"]} in list_980: continue inst_list.append({"a":inst["0"]}) if inst_list == []: return list_980 += inst_list #check if users institut in 980, if not take it from user_info if str(uid) == "1": pass # no adding of institutes into 980 for admin else: str_list_980 = [str(i) for i in list_980] #convert list with dicts into list with str(dicts), because python sets use list with strings intersection_groups = set(str_list_980) & set(user_groups) # user institute not in 980 yet intersection_vdb = set(["{'a': 'VDB'}", "{'a': 'VDBRELEVANT'}","{'a': 'VDBINPRINT'}"]) & set(str_list_980) # not vdb_relevant if intersection_groups == set([]) and intersection_vdb == set([]): # # prevent submitting vdb irrelevant stuff for another institute list_980 += extract_user_institutes("a",user_groups) write_json(curdir,"hgf_980__",list_980)
def insert_date(curdir,fielddate,sdate,edate): """preprocessing date into 245$f fielddate can be hgf_245__f, hgf_1112_d sdate: hgf_245__fs or hgf_1112_dcs edate: hgf_245__fe or hgf_1112_dce """ if check_field_exists(curdir,sdate): hgf_sdate = read_file(curdir,sdate) else: hgf_sdate = "" if check_field_exists(curdir,edate): hgf_edate = read_file(curdir,edate) else: hgf_edate = "" if (hgf_sdate == "" and hgf_edate == "" ): return "" else: datestring = hgf_sdate + " - " + hgf_edate write_file(curdir,fielddate,datestring) remove_file(curdir, sdate) remove_file(curdir, edate)
def prefill_vdb_relevant(curdir): if not check_field_exists(curdir,"hgf_980__"): return text = read_file(curdir,"hgf_980__") if (('VDBRELEVANT' in text) or ('"VDB"' in text) or ('VDBINPRINT' in text)): value = "yes" else: value = "no" write_file(curdir,"hgf_vdb",value) if 'MASSMEDIA' in text: #prefill Massmedia write_file(curdir,"hgf_massmedia","yes")
def add_field(self,fieldname): """add a field (no json structure) to dictionary This function builds a structured field in self.data if we have several subfields as individual files. """ text = read_file(self.curdir,fieldname) fieldname = fieldname.replace("hgf_", "") marcfield = fieldname[0:5] subfield = fieldname[5] self.add_one_field(marcfield,subfield,text)
def add_FFT(curdir): """ !!!move_files_to_storage, move files to done have to be deleted from websubmit function!!! add FFT tag into record if this function is used: the functions stamp_uploaded_files should not be used in the websubmit anymore """ if not check_field_exists(curdir,"hgf_file"): return None # no file submitted fulltext_filename = read_file(curdir,"hgf_file") fulltext_path = os.path.join(curdir,"files","hgf_file",fulltext_filename) if not os.path.exists(fulltext_path): return None # no file named in hgf_file in files directory. something is wrong.. if os.path.getsize(fulltext_path) == 0: #check file size #send email #first get the url record link if not check_field_exists(curdir,"SN"): return None # no recid-->something is wrong.. recid = get_recordid(curdir) rec_url = CFG_SITE_URL + "/record/" + recid #create email email_txt = 'Dear Sir or Madam, \n\nAn empty file has been submitted for the record: %s\n\nProbably it was caused, because the file has been deleted from its directory before final submission into %s !!!\nIt is possible, that the record itself is not available, when this email was sent, but it should be processed within minutes. Once this is finished you may add the fulltext by accessing %s and using "modify record" link \n\n' %(rec_url,CFG_SITE_NAME,rec_url) email_subject = 'File submission incomplete!!!' #email check if check_field_exists(curdir,"SuE"): email_to = read_file(curdir,"SuE") # get email from submitter else: email_to = CFG_SITE_ADMIN_EMAIL # in case somehow no email of submitter exists, send email to admin send_email(CFG_SITE_ADMIN_EMAIL, email_to, email_subject, email_txt,copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN,header="",html_header="") return None #cancel file submission (the submitter has already been informed via email), the original submission will be processed. inst_dict_list = read_json(curdir,"hgf_9201_") #read in institutes inst_list = [] restriction = "firerole: allow groups 'STAFF'" # staff is always # add the institutes id and append the external auth info as this # builds the actual group name we need to allow here. for inst in inst_dict_list: restriction += ",'" + inst["0"] + ' ['+CFG_EXTERNAL_AUTH_DEFAULT+']' + "'" # TODO: multiple authentifications filename = read_file(curdir,"hgf_file") file_path = os.path.join(curdir,"files","hgf_file",filename) if not check_field_exists(curdir,"rn"): return rn = read_file(curdir,"rn") #fill subfields for FFT fft_dict = {} fft_dict["a"] = file_path fft_dict["n"] = rn fft_dict["r"] = restriction write_json(curdir,"hgf_FFT__",fft_dict)
def prefill_0247(curdir): """prefill URN, Patent""" if check_field_exists(curdir,"hgf_0247_"): #json structure jsonlist = read_json(curdir,"hgf_0247_") for jsondict in jsonlist: if not jsondict.has_key("2"): continue if jsondict["2"] == "Patent": write_file(curdir,"hgf_0247_a2pat",jsondict["a"]) elif jsondict["2"] == "URN": write_file(curdir,"hgf_0247_a2urn",jsondict["a"]) if check_field_exists(curdir,"hgf_0247_2"): if check_field_exists(curdir,"hgf_0247_a"): subfield_2 = read_file(curdir,"hgf_0247_2") subfield_a = read_file(curdir,"hgf_0247_a") if subfield_2 == "Patent": write_file(curdir,"hgf_0247_a2pat",subfield_a) elif subfield_2 == "URN": write_file(curdir,"hgf_0247_a2urn",subfield_a)
def handle_date(curdir,fielddate,sdate,edate): """preprocessing date into 245$f fielddate can be hgf_245__f, hgf_1112_d sdate: hgf_245__fs or hgf_1112_dcs edate: hgf_245__fe or hgf_1112_dce """ if not check_field_exists(curdir,fielddate): return date = read_file(curdir,fielddate) try: dat1,dat2 = date.split(" - ") except: return if dat1 != "": write_file(curdir,sdate,dat1) if dat2 != "": write_file(curdir,edate,dat2)
def write_mod_doctype(curdir): """write mod_doctype file to automatically connect to modification page""" #TODOD: do we need this function ??? doctype = read_file(curdir, 'doctype') # in inital dir, so avoid a global mod_doctype_path = os.path.join(curdir,"mod_"+doctype) mod_file = open(mod_doctype_path,"w") tuple_fields = get_details_and_description_of_all_fields_on_submissionpage(doctype, "SBI", 1) for _tuple in tuple_fields: field = _tuple[0] if field in ["hgf_start","hgf_end","hgf_master"]: continue mod_file.write( field + "\n") mod_file.close()
def handle_245(curdir): """245__a: title and 245__f:date -->text input fields 245__h:publication form --> autosuggest We need to read in 245__ (if exists) and add 245__a and 245__f in json format """ date,title = "","" # Title is special: we have non-structured input fields by default # where $f (date) needs a special handling plus we have a structured # input field from the possible token input of media type (AC's # request) => we have to assemble the structured field from it's # parts, and then re-store it as structure to a file, then the # follwoing workflow can transparently handle it as if it was passed # by a structure in the first place. # Get unstructured stuff if check_field_exists(curdir,"hgf_245__a"): title = read_file(curdir,"hgf_245__a") if check_field_exists(curdir,"hgf_245__f"): date = read_file(curdir,"hgf_245__f") # Initialize the structure jsondict = {} jsondict['245__'] = {} dict = {} # Try to get what we have already in the structure as such if check_field_exists(curdir,"hgf_245__"): jsondict = read_json(curdir,"hgf_245__") # in case of multiple publication forms (???, should be non repeatable, but just in case: create seperated comma string) pubforms = [] for pubform in jsondict: if 'h' in pubform: pubforms.append(pubform["h"]) pubstring = ", ".join(pubforms) if pubstring == "": jsondict = {} else: jsondict = {"h":pubstring} # Add unstructured fields, if they exist if not title == "": jsondict["a"] = title if not date == "": jsondict["f"] = date # Write the full structured file write_json(curdir,"hgf_245__",jsondict)
def insert_3367(curdir): """get doctype from authorities and create 3367 and set our ddoctypes into 980 """ doctype = read_file(curdir,"doctype") access = read_file(curdir,"access") #submission id subtype = '' try: # Check if we have a refinement of the doctype. Usually we have # this only for talks which could be "Invited" or whatever. If so, # add it to 3367_$x subtype = read_file(curdir,"hgf_3367_x") except: # Usually, we do not have refinements. pass doctype_dict = get_pubtype_info(doctype) if doctype_dict == {}: doctype_dict_list = [{"m":doctype}] #no authority # Run over the dictionary and build up a list of all document types. # Note that not all document types have to be hgf-types, they may as # well stem from other vocabularies (DINI/DRIVER...) else: doctype_dict_list = handle_list_of_doctype_dict(doctype_dict,access,doctype,subtype) doctype_dict_list = add_reportdoctype(curdir,doctype, doctype_dict_list) #add intrep doctype doctype_dict_list = add_journaldoctype(curdir,doctype, doctype_dict_list) #add journal doctype doctype_dict_list = add_bookdoctype(curdir,doctype, doctype_dict_list) #add book doctype doctype_dict_list = add_procdoctype(curdir,doctype, doctype_dict_list) #add proc doctype if check_field_exists(curdir,"hgf_980__"): list_980 = read_json(curdir,"hgf_980__") else: list_980 = [] # Only add our own doctypes to 980 (ie collections and not DINI/DRIVER) for dict in doctype_dict_list: try: if {"a":dict["m"]} in list_980: continue list_980.append({"a":dict["m"]}) except: pass write_json(curdir,"hgf_980__",list_980) write_json(curdir,"hgf_3367_",doctype_dict_list)
def handle_url(curdir): if check_field_exists(curdir,"hgf_8564_"): remove_file(curdir,"hgf_8564_u") jsondict_list = read_json(curdir,"hgf_8564_") #only one URL can be submitted/modified. bibedit urls die ;) for i in jsondict_list: if not i.has_key("u"): continue # no subfield u detected if CFG_SITE_URL in i["u"]: continue # skip internal file write_file(curdir,"hgf_8564_u",i["u"]) remove_file(curdir,"hgf_8564_") return # write only one URL if check_field_exists(curdir,"hgf_8564_u"): text = read_file(curdir,"hgf_8564_u") if CFG_SITE_URL in text: remove_file(curdir,"hgf_8564_u") #skip internal file
def handle_980(curdir): new_list = [] list_980 = read_json(curdir,"hgf_980__") doctype = read_file(curdir,"doctype") old_index = list_980.index({"a":doctype}) list_980.insert(0, list_980.pop(old_index)) #move original doctype to be first entry in 980 list, needed by invenio (more likely a bug) #remove double entries for _dict in list_980: if _dict in new_list: continue # remove double entries new_list.append(_dict) if check_field_exists(curdir,"hgf_delete"): new_list.append({"c":"DELETED"}) # user wants to delete this record new_list = set_restriction(new_list) # #set UNRESTRICTED if 980 collection appears in CFG_PUBLIC_COLLECTIONS write_json(curdir,"hgf_980__",new_list)
def handle_1001(curdir): """add gender to 1001_ technical field: 1001_ contains a list of a single dict with the name of the first author. gender should be applied to that one (we use gender only for phd-like entries), so we add it to the end of the dict. NOTE: for further processing the newly written technical field must not contain a real JSON structure, but again only this list of a single hash. """ if not check_field_exists(curdir,"hgf_1001_g"): return # no gender set if check_field_exists(curdir,"hgf_1001_"): jsondict = read_json(curdir,"hgf_1001_") gender = read_file(curdir,"hgf_1001_g") jsondict[0]["g"] = gender # 100 contains only one person write_json(curdir,"hgf_1001_",jsondict)
def Prefill_hgf_fields(parameters, curdir, form, user_info=None): """extract all information from DB-record as json dict and write files into curdir""" # record_dict = get_record(sysno) #get record record_dict = get_record(read_file(curdir, 'SN')) #get record json_dict = wash_db_record_dict(record_dict) #create nice json dictionary json_dict = add_non_json_fields(json_dict) #add single input fields write_all_files(curdir,json_dict) # write all values to files write_done_file(curdir) #write done file--> cheat invenio prefill_245(curdir) prefill_0247(curdir) prefill_gender(curdir) handle_url(curdir) handle_date(curdir,"hgf_245__f","hgf_245__fs","hgf_245__fe") handle_date(curdir,"hgf_1112_d","hgf_1112_dcs","hgf_1112_dce") write_mod_doctype(curdir) prefill_vdb_relevant(curdir) handle_institutes_for_modify(curdir)
def check_vdb_relevant(curdir,fieldname): if check_field_exists(curdir,fieldname): pass else: return None text = read_file(curdir,"hgf_vdb") if "yes" in text: return True else: return None
def insert_reportnr(curdir): """preprocessing of reportnumber""" rn = read_file(curdir,"rn") write_file(curdir,"hgf_037__a",rn)
def insert_email(curdir): """read SuE (emails of submitter) file and store it in 8560_f""" if not check_field_exists(curdir,"SuE"): return email = read_file(curdir,"SuE") write_file(curdir,"hgf_8560_f",email)
def check_9201(curdir): """deleting 9201_* if set in 980""" if not check_field_exists(curdir,"hgf_vdb"): return vdb_tag = read_file(curdir,"hgf_vdb") if vdb_tag == "no": os.system("rm -f %s/hgf_9201_*" %curdir) else: pass