Python read_fileの例、invenio.websubmit_functions.Websubmit_Helpers_hgf.read_file Pythonの例

コード例 #1

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def insert_thesis_note(curdir):
	"""insert 502__a --> thesis note:
	syntax: 'University, Doctype, Granted Year'
	insert 502__b (if possible)
	insert 655_7
	"""
	doctype = read_file(curdir,"doctype")
	jsondict = get_pubtype_info(doctype)
	if "I502__b" in jsondict.keys(): write_file(curdir,"hgf_502__b",jsondict["I502__b"])

	all_fields = True
	if check_field_exists(curdir,"hgf_502__c") and (check_field_exists(curdir,"hgf_260__c") or check_field_exists(curdir,"hgf_502__d")): pass
	else: all_fields = None

	if not "I502__a" in jsondict.keys(): all_fields = None
	if not all_fields: return #if some field is missing, do not create thesis_note
	norm_doctype = jsondict["I502__a"]
	if check_field_exists(curdir,"hgf_502__d"):
		thesis_note = read_file(curdir,"hgf_502__c") + ", " + norm_doctype + ", " + read_file(curdir,"hgf_502__d") # uese granted year
	else: thesis_note = read_file(curdir,"hgf_502__c") + ", " + norm_doctype + ", " + read_file(curdir,"hgf_260__c") #use publication year

	if "I650_7a" in jsondict.keys(): write_file(curdir,"hgf_650_7a",jsondict["I650_7a"].encode('utf-8'))
	if "I650_72" in jsondict.keys(): write_file(curdir,"hgf_650_72",jsondict["I650_72"])
	if "I650_70" in jsondict.keys(): write_file(curdir,"hgf_650_70",jsondict["I650_70"])

	write_file(curdir,"hgf_650_7x",norm_doctype)
	write_file(curdir,"hgf_502__a",thesis_note)

コード例 #2

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def insert_webyear(curdir):
	"""set web year (Wissenschaftlicher Ergebnis Berichtsjahr)
	This function has to be called after insert_date function"""
	try:
		recid = int(get_recordid(curdir))
	except:
		return #when do we get this exception???
	orig_record_980 = get_fieldvalues(recid,'980__a') #create_hgf_collection was alreay active at this step and changed 980-field, so we have to get the original collections of the record from database
	if "VDB" in orig_record_980: return # do not change web_year after it was released by library (collection tag VDB)
	web_year = None
	current_year = str(datetime.datetime.now().year)
	if check_field_exists(curdir,"hgf_260__c"): # publication_year exists
		pub_year = read_file(curdir,"hgf_260__c")
		if pub_year == current_year: web_year = pub_year # publication year is current system year --> set web-year
	else:
		if check_field_exists(curdir,"hgf_245__f"): # check thesis end_date
			date = read_file(curdir,"hgf_245__f") #insert_date function has already been executed
			sdate,edate = date.split(" - ")
			if ((current_year in edate) or (current_year in sdate)): web_year = current_year # ending year of thesis is current system year --> set web-year
		if check_field_exists(curdir,"hgf_1112_d"): # check conf end_date
			date = read_file(curdir,"hgf_1112_d")
			sdate,edate = date.split(" - ")
			if ((current_year in edate) or (current_year in sdate)): web_year = current_year # ending year of conference is current system year --> set web-year

	if web_year: #write web_year
		write_file(curdir,"hgf_9141_y",web_year)

コード例 #3

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def handle_0247(curdir):
	""" Handle persistend identifiers in 0247_. This implies to set $2
	to source and $a to value. only in case of user input

	Note: if we get new PIDs that should be handled we need to adopt
	this function!"""

	if check_field_exists(curdir,"hgf_0247_"):
		listdict_ = read_json(curdir,"hgf_0247_")

	else: listdict_ = []

	if check_field_exists(curdir,"hgf_0247_a2pat"): # Patent
		text = read_file(curdir,"hgf_0247_a2pat")
		listdict_.append({"2":"Patent","a":text})
	if check_field_exists(curdir,"hgf_0247_a2urn"): # URN
		text = read_file(curdir,"hgf_0247_a2urn")
		listdict_.append({"2":"URN","a":text})
	if check_field_exists(curdir,"hgf_773__a"):     # store DOI in both 773__ and in 0247, this is an input field
		text = read_file(curdir,"hgf_773__a")
		listdict_.append({"2":"doi","a":text})
	if (not check_field_exists(curdir,"hgf_773__a") and check_field_exists(curdir,"hgf_773__")): # doi can be stored in 773__ as json array
		dict_773 = read_json(curdir,"hgf_773__")
		for ent in dict_773: #more then 1 DOI
			if not "a" in ent.keys(): continue
			listdict_.append({"2":"doi","a":ent["a"]})

	if listdict_ == []: return

	new_listdict = []
	for dict in listdict_:
		if dict in new_listdict: continue # remove double entries
		new_listdict.append(dict)
	write_json(curdir,"hgf_0247_",new_listdict)

	#Insert DOI into 773__a only in case no 773__a or 773 json array exist
	if check_field_exists(curdir,"773__a"):     return #we have a 773__a

	if check_field_exists(curdir,"773__"):
		listdict_773 = read_json(curdir,"773__")
		for ent in listdict_773:
			if ent.has_key("a"): return # we have a 773__a

	for ent in new_listdict:
		if not ent.has_key("2"):     continue
		if not (ent["2"] == "doi"):  continue
		# map doi into 773__a

		# write DOI in 773__a if we do not yet have one.
		# in case of multiple DOIs the first one will win <--> we cannot
		# write the 773__ because we do not know if other 773__* fields
		# has been inputted and to which belongs the DOI. TODO!
		write_file(curdir,"hgf_773__a",ent["a"])

	return

コード例 #4

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def insert_inst_into_980(curdir,uid):
	"""collection handling for institutes"""
	user_groups = get_usergroups(uid)
	if check_field_exists(curdir,"hgf_9201_"):
		if read_file(curdir,"hgf_9201_") == "[]": remove_file(curdir,"hgf_9201_") # delete file in case of empty sequence! TODO: this should not happen and has to be fixed in hgfInstitutes.js

	if not check_field_exists(curdir,"hgf_9201_"): #make sure that we have at least one institute
		if str(uid) == "1": return #do not add institutes for admin
		user_insts = extract_user_institutes("0",user_groups)
		if user_insts == []:
			email_txt = "%s is not assigned to any institute. This email was generated from Covert_hgf_fields and function insert_inst_into_980" %get_recordid(curdir)
			send_email(CFG_SITE_ADMIN_EMAIL, CFG_SITE_ADMIN_EMAIL, "ERROR: no institute assigned", email_txt,header="",html_header="")
			return #this should not happen!
		jsondict = user_insts   #add institute even if no institute chosen to be inserted into 980
	else:
		jsondict = read_json(curdir,"hgf_9201_")
	inst_list = []
	list_980 = read_json(curdir,"hgf_980__")
	
	for inst in jsondict:
		if {"a":inst["0"]} in list_980: continue
		inst_list.append({"a":inst["0"]})
	if inst_list == []: return
	list_980 += inst_list

	#check if users institut in 980, if not take it from user_info
	if str(uid) == "1": pass # no adding of institutes into 980  for admin
	else:
		str_list_980 = [str(i) for i in list_980] #convert list with dicts into list with str(dicts), because python sets use list with strings
		intersection_groups = set(str_list_980) & set(user_groups) # user institute not in 980 yet
		intersection_vdb = set(["{'a': 'VDB'}", "{'a': 'VDBRELEVANT'}","{'a': 'VDBINPRINT'}"]) & set(str_list_980) # not vdb_relevant

		if intersection_groups == set([]) and  intersection_vdb == set([]): # # prevent submitting vdb irrelevant stuff for another institute
			list_980 += extract_user_institutes("a",user_groups)
	write_json(curdir,"hgf_980__",list_980)

コード例 #5

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def insert_date(curdir,fielddate,sdate,edate):
	"""preprocessing date into 245$f
	fielddate can be hgf_245__f, hgf_1112_d
	sdate: hgf_245__fs or hgf_1112_dcs
	edate: hgf_245__fe or hgf_1112_dce
	"""
	if check_field_exists(curdir,sdate):
		hgf_sdate = read_file(curdir,sdate)
	else: hgf_sdate = ""
	if check_field_exists(curdir,edate):
		hgf_edate = read_file(curdir,edate)
	else: hgf_edate = ""
	if (hgf_sdate == "" and hgf_edate == "" ): return ""
	else: datestring = hgf_sdate + " - " + hgf_edate
	write_file(curdir,fielddate,datestring)
	remove_file(curdir, sdate)
	remove_file(curdir, edate)

コード例 #6

0

ファイルを表示

ファイル: Prefill_hgf_fields.py プロジェクト: pazera/hgf-invenio

def prefill_vdb_relevant(curdir):
	if not check_field_exists(curdir,"hgf_980__"): return
	text = read_file(curdir,"hgf_980__")
	if (('VDBRELEVANT' in text) or ('"VDB"' in text) or ('VDBINPRINT' in text)): value = "yes"
	else: value = "no"
	write_file(curdir,"hgf_vdb",value)
	if 'MASSMEDIA' in text: #prefill Massmedia 
		write_file(curdir,"hgf_massmedia","yes")

コード例 #7

0

ファイルを表示

ファイル: Create_hgf_record_json.py プロジェクト: pazera/hgf-invenio

	def add_field(self,fieldname):
		"""add a field  (no json structure) to dictionary
		This function builds a structured field in self.data if we have several subfields as individual files.
		"""
		text = read_file(self.curdir,fieldname)
		fieldname = fieldname.replace("hgf_", "")
		marcfield = fieldname[0:5]
		subfield = fieldname[5]
		self.add_one_field(marcfield,subfield,text)

コード例 #8

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def add_FFT(curdir):
	"""
	!!!move_files_to_storage, move files to done have to be deleted from websubmit function!!!
	add FFT tag into record
	if this function is used: the functions stamp_uploaded_files should not be used in the websubmit anymore
	"""
	if not check_field_exists(curdir,"hgf_file"): return None # no file submitted
	fulltext_filename = read_file(curdir,"hgf_file")
	fulltext_path = os.path.join(curdir,"files","hgf_file",fulltext_filename)
	if not os.path.exists(fulltext_path): return None # no file named in hgf_file in files directory. something is wrong..
	if os.path.getsize(fulltext_path) == 0: #check file size
		#send email
		#first get the url record link
		if not check_field_exists(curdir,"SN"): return None # no recid-->something is wrong..
		recid = get_recordid(curdir)
		rec_url = CFG_SITE_URL + "/record/" + recid
		#create email
		email_txt = 'Dear Sir or Madam, \n\nAn empty file has been submitted for the record: %s\n\nProbably it was caused, because the file has been deleted from its directory before final submission into %s !!!\nIt is possible, that the record itself is not available, when this email was sent, but it should be processed within minutes. Once this is finished you may add the fulltext by accessing %s and using "modify record" link \n\n' %(rec_url,CFG_SITE_NAME,rec_url)
		email_subject = 'File submission incomplete!!!'
		#email check
		if check_field_exists(curdir,"SuE"): email_to = read_file(curdir,"SuE") # get email from submitter
		else: email_to = CFG_SITE_ADMIN_EMAIL # in case somehow no email of submitter exists, send email to admin

		send_email(CFG_SITE_ADMIN_EMAIL, email_to, email_subject, email_txt,copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN,header="",html_header="")
		return None #cancel file submission (the submitter has already been informed via email), the original submission will be processed.


	inst_dict_list = read_json(curdir,"hgf_9201_") #read in institutes
	inst_list = []
	restriction = "firerole: allow groups 'STAFF'" # staff is always
	# add the institutes id and append the external auth info as this
	# builds the actual group name we need to allow here.
	for inst in inst_dict_list:	restriction += ",'" + inst["0"] + ' ['+CFG_EXTERNAL_AUTH_DEFAULT+']' + "'"  # TODO: multiple authentifications
	filename = read_file(curdir,"hgf_file")
	file_path = os.path.join(curdir,"files","hgf_file",filename)
	if not check_field_exists(curdir,"rn"): return
	rn = read_file(curdir,"rn")

	#fill subfields for FFT
	fft_dict = {}
	fft_dict["a"] = file_path
	fft_dict["n"] = rn
	fft_dict["r"] = restriction
	write_json(curdir,"hgf_FFT__",fft_dict)

コード例 #9

0

ファイルを表示

ファイル: Prefill_hgf_fields.py プロジェクト: pazera/hgf-invenio

def prefill_0247(curdir):
	"""prefill URN, Patent"""
	if check_field_exists(curdir,"hgf_0247_"): #json structure
		jsonlist = read_json(curdir,"hgf_0247_")
		for jsondict in jsonlist:
			if not jsondict.has_key("2"): continue
			if jsondict["2"] == "Patent":
				write_file(curdir,"hgf_0247_a2pat",jsondict["a"])
			elif jsondict["2"] == "URN":	
				write_file(curdir,"hgf_0247_a2urn",jsondict["a"])
	
	if check_field_exists(curdir,"hgf_0247_2"):
		if check_field_exists(curdir,"hgf_0247_a"):
			subfield_2 = read_file(curdir,"hgf_0247_2")
			subfield_a = read_file(curdir,"hgf_0247_a")
			if subfield_2 == "Patent":
				write_file(curdir,"hgf_0247_a2pat",subfield_a)
			elif subfield_2 == "URN":
				write_file(curdir,"hgf_0247_a2urn",subfield_a)

コード例 #10

0

ファイルを表示

ファイル: Prefill_hgf_fields.py プロジェクト: pazera/hgf-invenio

def handle_date(curdir,fielddate,sdate,edate):
	"""preprocessing date into 245$f
	fielddate can be hgf_245__f, hgf_1112_d
	sdate: hgf_245__fs or hgf_1112_dcs
	edate: hgf_245__fe or hgf_1112_dce
	"""
	if not check_field_exists(curdir,fielddate): return
	date = read_file(curdir,fielddate)
	try: dat1,dat2 = date.split(" - ")
	except: return
	if dat1 != "": write_file(curdir,sdate,dat1)
	if dat2 != "": write_file(curdir,edate,dat2)

コード例 #11

0

ファイルを表示

ファイル: Prefill_hgf_fields.py プロジェクト: pazera/hgf-invenio

def write_mod_doctype(curdir):
	"""write mod_doctype file to automatically connect to modification page"""
	#TODOD: do we need this function ???
	doctype = read_file(curdir, 'doctype') # in inital dir, so avoid a global
	mod_doctype_path = os.path.join(curdir,"mod_"+doctype)
	mod_file = open(mod_doctype_path,"w")
	tuple_fields = get_details_and_description_of_all_fields_on_submissionpage(doctype, "SBI", 1)
	for _tuple in tuple_fields:
		field = _tuple[0]
		if field in ["hgf_start","hgf_end","hgf_master"]: continue
		mod_file.write( field + "\n")
	mod_file.close()

コード例 #12

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def handle_245(curdir):
	"""245__a: title and 245__f:date  -->text input fields
	 245__h:publication form --> autosuggest
	 We need to read in 245__ (if exists) and add 245__a and 245__f in json format
	"""
	date,title = "",""
	# Title is special: we have non-structured input fields by default
	# where $f (date) needs a special handling plus we have a structured
	# input field from the possible token input of media type (AC's
	# request) => we have to assemble the structured field from it's
	# parts, and then re-store it as structure to a file, then the
	# follwoing workflow can transparently handle it as if it was passed
	# by a structure in the first place.


	# Get unstructured stuff
	if check_field_exists(curdir,"hgf_245__a"): title = read_file(curdir,"hgf_245__a")
	if check_field_exists(curdir,"hgf_245__f"): date = read_file(curdir,"hgf_245__f")

	# Initialize the structure
	jsondict = {}
	jsondict['245__'] = {}
	dict = {}
	# Try to get what we have already in the structure as such
	if check_field_exists(curdir,"hgf_245__"):
		jsondict = read_json(curdir,"hgf_245__")
	# in case of multiple publication forms (???, should be non repeatable, but just in case: create seperated comma string)
	pubforms = []
	for pubform in jsondict:
		if 'h' in pubform:
			pubforms.append(pubform["h"])
	pubstring = ", ".join(pubforms)
	if pubstring == "": jsondict = {}
	else: jsondict = {"h":pubstring}
	# Add unstructured fields, if they exist
	if not title == "": jsondict["a"] = title
	if not date  == "": jsondict["f"] = date
	# Write the full structured file
	write_json(curdir,"hgf_245__",jsondict)

コード例 #13

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def insert_3367(curdir):
	"""get doctype from authorities and create 3367 and set our ddoctypes into 980 """
	doctype = read_file(curdir,"doctype")
	access = read_file(curdir,"access") #submission id
	subtype = ''
	try:
		# Check if we have a refinement of the doctype. Usually we have
		# this only for talks which could be "Invited" or whatever. If so,
		# add it to 3367_$x
		subtype = read_file(curdir,"hgf_3367_x")
	except:
	  # Usually, we do not have refinements.
		pass
	doctype_dict = get_pubtype_info(doctype)
	if doctype_dict == {}: 
		doctype_dict_list = [{"m":doctype}]   #no authority
	# Run over the dictionary and build up a list of all document types.
	# Note that not all document types have to be hgf-types, they may as
	# well stem from other vocabularies (DINI/DRIVER...)
	else:
		doctype_dict_list = handle_list_of_doctype_dict(doctype_dict,access,doctype,subtype)
		doctype_dict_list = add_reportdoctype(curdir,doctype, doctype_dict_list) #add intrep doctype
		doctype_dict_list = add_journaldoctype(curdir,doctype, doctype_dict_list) #add journal doctype
		doctype_dict_list = add_bookdoctype(curdir,doctype, doctype_dict_list) #add book doctype
		doctype_dict_list = add_procdoctype(curdir,doctype, doctype_dict_list) #add proc doctype
	if check_field_exists(curdir,"hgf_980__"):
		list_980 = read_json(curdir,"hgf_980__")

	else: list_980 = []
	# Only add our own doctypes to 980 (ie collections and not DINI/DRIVER)
	for dict in doctype_dict_list:
		try:
			if {"a":dict["m"]} in list_980: continue
			list_980.append({"a":dict["m"]})
		except:
			pass
	write_json(curdir,"hgf_980__",list_980)
	write_json(curdir,"hgf_3367_",doctype_dict_list)

コード例 #14

0

ファイルを表示

ファイル: Prefill_hgf_fields.py プロジェクト: pazera/hgf-invenio

def handle_url(curdir):
	if check_field_exists(curdir,"hgf_8564_"): 
		remove_file(curdir,"hgf_8564_u")
		
		jsondict_list = read_json(curdir,"hgf_8564_")
		#only one URL can be submitted/modified. bibedit urls die ;)
		for i in jsondict_list:
			if not i.has_key("u"): continue # no subfield u detected
			if CFG_SITE_URL in i["u"]: continue # skip internal file
			write_file(curdir,"hgf_8564_u",i["u"])
			remove_file(curdir,"hgf_8564_")
			return # write only one URL
	if check_field_exists(curdir,"hgf_8564_u"):
		text = read_file(curdir,"hgf_8564_u")
		if CFG_SITE_URL in text: remove_file(curdir,"hgf_8564_u") #skip internal file

コード例 #15

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def handle_980(curdir):
	new_list = []
	list_980 = read_json(curdir,"hgf_980__")
	doctype = read_file(curdir,"doctype")

	old_index = list_980.index({"a":doctype})

	list_980.insert(0, list_980.pop(old_index)) #move original doctype to be first entry in 980 list, needed by invenio (more likely a bug)
	#remove double entries
	for _dict in list_980:
		if _dict in new_list: continue # remove double entries
		new_list.append(_dict)

	if check_field_exists(curdir,"hgf_delete"): new_list.append({"c":"DELETED"}) # user wants to delete this record
	new_list = set_restriction(new_list) # #set UNRESTRICTED if 980 collection appears in CFG_PUBLIC_COLLECTIONS
	write_json(curdir,"hgf_980__",new_list)

コード例 #16

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def handle_1001(curdir):
	"""add gender to 1001_ technical field:
		1001_ contains a list of a single dict with the name of the first
		author. gender should be applied to that one (we use gender only
		for phd-like entries), so we add it to the end of the dict.

		NOTE: for further processing the newly written technical field
		must not contain a real JSON structure, but again only this list
		of a single hash.
	"""
	if not check_field_exists(curdir,"hgf_1001_g"): return  # no gender set

	if check_field_exists(curdir,"hgf_1001_"):
		jsondict = read_json(curdir,"hgf_1001_")
		gender = read_file(curdir,"hgf_1001_g")
		jsondict[0]["g"] = gender            # 100 contains only one person
		write_json(curdir,"hgf_1001_",jsondict)

コード例 #17

0

ファイルを表示

ファイル: Prefill_hgf_fields.py プロジェクト: pazera/hgf-invenio

def Prefill_hgf_fields(parameters, curdir, form, user_info=None):
	"""extract all information from DB-record as json dict and write files into curdir"""
	# record_dict = get_record(sysno) #get record
	record_dict = get_record(read_file(curdir, 'SN')) #get record
	json_dict = wash_db_record_dict(record_dict) #create nice json dictionary
	json_dict = add_non_json_fields(json_dict) #add single input fields
	write_all_files(curdir,json_dict) # write all values to files
	write_done_file(curdir) #write done file--> cheat invenio
	prefill_245(curdir)
	prefill_0247(curdir)
	prefill_gender(curdir)
	handle_url(curdir)
	handle_date(curdir,"hgf_245__f","hgf_245__fs","hgf_245__fe")
	handle_date(curdir,"hgf_1112_d","hgf_1112_dcs","hgf_1112_dce")
	write_mod_doctype(curdir)
	prefill_vdb_relevant(curdir)
	handle_institutes_for_modify(curdir)

コード例 #18

0

ファイルを表示

ファイル: Create_hgf_collection.py プロジェクト: pazera/hgf-invenio

def check_vdb_relevant(curdir,fieldname):
	if check_field_exists(curdir,fieldname): pass
	else: return None
	text = read_file(curdir,"hgf_vdb")
	if "yes" in text: return True
	else: return None

コード例 #19

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def insert_reportnr(curdir):
	"""preprocessing of reportnumber"""
	rn = read_file(curdir,"rn")
	write_file(curdir,"hgf_037__a",rn)

コード例 #20

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def insert_email(curdir):
	"""read SuE (emails of submitter) file and store it in 8560_f"""
	if not check_field_exists(curdir,"SuE"): return
	email = read_file(curdir,"SuE")
	write_file(curdir,"hgf_8560_f",email)

コード例 #21

0

ファイルを表示

ファイル: Convert_hgf_fields.py プロジェクト: pazera/hgf-invenio

def check_9201(curdir):
	"""deleting 9201_* if  set in 980"""
	if not check_field_exists(curdir,"hgf_vdb"): return
	vdb_tag = read_file(curdir,"hgf_vdb")
	if vdb_tag == "no": os.system("rm -f %s/hgf_9201_*" %curdir)
	else: pass