def createPasscode2ResourceIdsAndResourceIds2SubmisionIds(self, verbose): name="SerializeResource.createPasscode2ResourceIdsAndResourceIds2SubmisionIds" p2ri=collections.defaultdict(list) #passcode 2 rid p2s=collections.defaultdict(list) #passcode 2 subid rid2ris=collections.defaultdict(list) templistrid=[] templistsubid=[] if (verbose==1): print "\tExecuting "+name for p, ris in self.__p2ris.iteritems(): for ri in ris: status=pruneName(ri.get_prodstatus(),"") status=modifyString(status,"/","_") pname=pruneName(ri.get_name(),"") pname=modifyString(pname,"/","-") pname=pname.replace("&", "&") subid=ri.get_conf()+ri.get_year()+self.__RIDSEP__+ri.get_passcode() rid=subid+self.__RIDSEP__+ri.get_type()+self.__RIDSEP__+pname+self.__RIDSEP__+status #print rid#.get_type() # if not (rid in templistrid): templistrid.append(rid) p2ri[p].append(rid) #print rid#.get_type() # rid2ris[rid].append(ri) if not (subid in templistsubid): templistsubid.append(subid) p2s[p].append(subid) self.set_p_2ris(p2ri) self.set_p_2subs(p2s) self.set_rid_2ris(rid2ris)
def createPasscode2AuthIdsAndAffiliationSetAndAuthIds2Affiliations(self,verbose): name="SerializeAuthor.createPasscode2AuthIdsAndAffiliationSetAndAuthIds2Affiliations" p2s=collections.defaultdict(list) s2a=collections.defaultdict(list) affiliations=collections.defaultdict(list) templistida=[] templistidaffi=[] if (verbose==1): print "\tExecuting "+name for p, auths in self.__p2auths.iteritems(): for auth in auths: #print "\nPasscode "+p +" hasAuthEmail "+auth.get_email() +"\n" idaffi="NOAFFI" #passcode to sha mappaing ida=shasum(auth.get_email()) affiliation=auth.get_affiliation() name=affiliation.get_name() country=affiliation.get_country_code() # if name is not None and country is not None: idaffi=pruneName(name,country) # else: # idaffi="NOAFFI" # name="AFFINAME" # country="None" idaffi=shasum(idaffi) #print "XXX "+temp+ " "+idaffi if not (ida in templistida): p2s[p].append(ida) templistida.append(ida) else: if (verbose==1): print "\t\tWARNING Author Element "+ida+ " with email "+auth.get_email()+ " appears many times" # affiliations #print "XXX "+temp+ " "+idaffi s2a[ida].append(idaffi) if not (idaffi in templistidaffi ): templistidaffi.append(idaffi) affiliations[idaffi].append(affiliation) else: if (verbose==1): print "\t\tWARNING Affiliation Element "+ida+ " with name "+name+ " and country "+country+ " appears many times" self.set_pass_2_sha(p2s) self.set_affiliation_id_2_details(affiliations) self.set_sha_2_affiliation(s2a)
def createPasscode2SubsIdsAndSubsId2PapersAndSubsId2Confs(self, verbose): name = "SerializeSubmission.createPasscode2SubsIdsAndSubsId2PapersAndSubsId2Confs" p2subs = collections.defaultdict(list) # passcode2 subid p2ps = collections.defaultdict(list) # paper id to paper id p2cs = collections.defaultdict(list) # paper id to conf id p2pobj = collections.defaultdict(list) # paper id to paper obj templistpid = [] templistconfid = [] templistsubid = [] if verbose == 1: print "\tExecuting " + name for p, subs in self.__p2subs.iteritems(): papers = self.__p2papers.get(p) confs = self.__p2confs.get(p) for s in subs: subid = s.get_conf() + s.get_year() + "#" + s.get_passcode() if not (subid in templistsubid): templistsubid.append(subid) p2subs[p].append(subid) if papers is not None: for paper in papers: pid = str(paper.get_conf()) + str(paper.get_year()) + str(paper.get_pid()) pid = shasum(pid) if not (pid in templistpid): templistpid.append(pid) p2ps[p].append(pid) p2pobj[p].append(paper) if confs is not None: for conf in confs: idc = conf.get_conf() idy = conf.get_year() idx = idc + idy idx = utils.pruneName(idx, " ") if not (idx in templistconfid): templistconfid.append(idx) p2cs[p].append(idc + idy) self.set_pass_2subs(p2subs) self.set_pass_2papers(p2ps) self.set_pass_2confs(p2cs) self.set_pass_2paper_obj(p2pobj)
def serializeResourcesIntoManyFile(self,verbose): name ="SerializeResource.serializeResourcesIntoManyFile" rtype_subclassed =set() newrtype =collections.defaultdict(set) names_serialized =set() avails_serialized =set() status_serialized =set() mods_serialized =set() l1_serialized =set() uses_serialized =set() res_serialized =set() append_res="" ns="&lremap;" #print self.get_rid_2ris() if (verbose==1): print "\t\tExecuting "+name #idx file idxfile=self.outfile+self.__INDEXOFRES__ copy2(self.headerfile, idxfile) iFile=codecs.open(idxfile,'a','utf-8') iFile.write((self.__STARTRES__+"\n").encode('utf-8')) for p, rids in self.get_p_2ris().iteritems(): for rid in rids: rtype_subclassed =set() newrtype =collections.defaultdict(set) names_serialized =set() avails_serialized =set() status_serialized =set() mods_serialized =set() l1_serialized =set() uses_serialized =set() res_serialized =set() ''' prepare the structure for new classes ''' ris=self.get_rid_2ris().get(rid) if ris is not None: for ri in ris: #print rid + " "+ str(ri) ''' create the description file ''' rid1=shasum(rid) first_order_folder = str(rid1[:1]) second_order_folder = str(rid1[0:3]) if not os.path.exists(self.outfile+"/" + self.__RES__+first_order_folder): os.makedirs(self.outfile+"/" + self.__RES__+"/"+first_order_folder) if not os.path.exists(self.outfile+"/" + self.__RES__+first_order_folder + "/" + second_order_folder): os.makedirs(self.outfile+"/" + self.__RES__+first_order_folder + "/" + second_order_folder) append_res=self.__RES__+first_order_folder + "/" + second_order_folder #write description file iLine=self.__t1+"<rdf:Description rdf:about=\"&ri;"+"/"+append_res+"/"+str(rid1)+"\">" iFile.write((iLine+"\n").encode('utf-8')) label="Conference: "+ri.get_conf()+ " and " label=label+"Year: "+ri.get_year()+" and " label=label+"Passcode: "+ri.get_passcode()+" and " label=label+"Type: "+ri.get_type()+" and " label=label+"Name: "+ri.get_name().decode(encoding='UTF-8',errors='strict')+" " iLine=self.__t2+"<rdfs:label>"+label+"</rdfs:label>" #iFile.write((iLine+"\n").encode('utf-8')) iFile.write((iLine+"\n")) iLine=self.__t1+"</rdf:Description>" #iFile.write((iLine+"\n").encode('utf-8')) iFile.write((iLine+"\n")) # as usual rtype=ri.get_type() #print rid + " "+ str(rtype) rtype=pruneName(rtype, "") #rtype="Language_Resources/Technologies_Infrastructure" rtype=modifyString(rtype,"/","-") rtype=rtype.replace("&", "&") if not (rtype in self.__RTYPES__): ns="&ri;#" if not rtype in rtype_subclassed: #print "SUB CLASS "+ rtype rtype_subclassed.add(rtype) newrtype[rtype].add(1) else: nrc=newrtype.get(rtype) if nrc is not None: for num in nrc: #print num del newrtype[rtype] newrtype[rtype].add(num+1) else: if (self.verbose==1): print "\t\t Warning resource type "+rtype+ " NOT YET ENCOUNTERED" else: ns="&lremap;" ''' create the resourcename <owl:NamedIndividual rdf:about="&ri;myname"> <rdf:type rdf:resource="&lremap;ResourceName"/> </owl:NamedIndividual> ''' dst=self.outfile+"/"+append_res+"/"+rid1 copy2(self.headerfile, dst) oFile=codecs.open(dst,'a','utf-8') line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+str(rid1)+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\""+ns+rtype+"\"/>" oFile.write((line + "\n").encode('utf-8')) rname=ri.get_name() #print rid + " "+ str(rtype) rname=pruneName(rname, "") rname=modifyString(rname,"^","-") #rname="-" rname=rname.replace("&", "&") if not (rname in names_serialized): names_serialized.add(rname) #avail="Freely_Available_aaa" avail= ri.get_avail() if avail is not None: avail=pruneName(avail, "") avail=modifyString(avail,"^","-") avail=avail.replace("&", "&") if not (avail in self.__RAVAILS__): avails_serialized.add(avail) line=self.__t2+"<lremap:hasResourceAvailability rdf:resource=\"&ri;"+append_res+"/"+avail+ "\"/>" else: line=self.__t2+"<lremap:hasResourceAvailability rdf:resource=\"&lremap;"+avail+ "\"/>" oFile.write((line + "\n").encode('utf-8')) status= ri.get_prodstatus() if (status is not None) and (status != "NoStatus"): status=pruneName(status, "") status=modifyString(status,"^","-") status=status.replace("&", "&") if not (status in self.__RSTATUS__): status_serialized.add(status) line=self.__t2+"<lremap:hasResourceStatus rdf:resource=\"&ri;"+append_res+"/"+status+ "\"/>" else: line=self.__t2+"<lremap:hasResourceStatus rdf:resource=\"&lremap;"+status+ "\"/>" oFile.write((line + "\n").encode('utf-8')) mod= ri.get_modality() if mod is not None: mod=pruneName(mod, "") mod=modifyString(mod,"^","-") mod=mod.replace("&", "&") if not (mod in self.__RMODS__): mods_serialized.add(mod) line=self.__t2+"<lremap:hasResourceModality rdf:resource=\"&ri;"+append_res+"/"+mod+ "\"/>" else: line=self.__t2+"<lremap:hasResourceModality rdf:resource=\"&lremap;"+mod+ "\"/>" oFile.write((line + "\n").encode('utf-8')) use= ri.get_resourceusage() if use is not None: use=pruneName(use, "") use=modifyString(use,"^","-") use=use.replace("&", "&") if not (use in self.__RUSES__): uses_serialized.add(use) line=self.__t2+"<lremap:hasResourceUse rdf:resource=\"&ri;"+append_res+"/"+use+ "\"/>" else: line=self.__t2+"<lremap:hasResourceUse rdf:resource=\"&lremap;"+use+ "\"/>" oFile.write((line + "\n").encode('utf-8')) ''' create the individual with all properties <owl:NamedIndividual rdf:about="&ri;MyCorpus"> <rdf:type rdf:resource="&lremap;Corpus"/> <lvont:language rdf:resource="&lexvo;id/iso639-3/ita"/> <lremap:hasResourceLanguageType rdf:resource="&lremap;Bi"/> <lremap:hasResourceAvailability rdf:resource="&lremap;Freely_Available"/> <lremap:hasResourceName rdf:resource="&ri;myname"/> </owl:NamedIndividual> ''' #line=self.__t2+"<lremap:hasResourceName rdf:resource=\"&ri;"+append_res+"/"+rname+ "\"/>" line=self.__t2+"<lremap:hasResourceName rdf:resource=\"&lremap;"+rname.decode(encoding='UTF-8',errors='strict')+ "\"/>" #oFile.write((line + "\n").encode('utf-8')) oFile.write(line+"\n") # referencing submissions pids =self.get_p_2subs().get(p) if pids is not None: for pid in pids: line=self.__t2+"<dcterms:references rdf:resource=\"⊂#"+str(pid)+"\"/>" oFile.write((line + "\n").encode('utf-8')) #closing individual line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRES__ + "\n").encode('utf-8')) # create subclass oFile.write((self.__STARTRESCLS__).encode('utf-8')) ''' create <owl:Class rdf:about="&ri;MyRT"> <rdfs:subClassOf rdf:resource="&lremap;ResourceType"/> </owl:Class> ''' for rc in rtype_subclassed: line=self.__t1+"<owl:Class rdf:about=\"&ri;"+append_res+"/"+rc+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdfs:subClassOf rdf:resource=\"&lremap;ResourceType\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:Class>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRESCLS__).encode('utf-8')) #resourcenames inds for rname in names_serialized: oFile.write((self.__STARTRESNAMES__).encode('utf-8')) #line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+rname+ "\">" line=self.__t1+"<owl:NamedIndividual rdf:about=\"&lremap;"+append_res+"/"+rname.decode(encoding='UTF-8',errors='strict')+ "\">" #oFile.write((line + "\n").encode('utf-8')) oFile.write(line+"\n") if rname!="-": line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceName\"/>" else: line=self.__t2+"<rdf:type rdf:resource=\"&lremap;NoName\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRESNAMES__).encode('utf-8')) for avail in avails_serialized: oFile.write((self.__STARTRESAVAILS__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+avail+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceAvailability\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRESAVAILS__).encode('utf-8')) for status in status_serialized: oFile.write((self.__STARTRESSTATUS__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+status+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceStatus\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n\n").encode('utf-8')) oFile.write((self.__ENDRESSTATUS__).encode('utf-8')) for mod in mods_serialized: oFile.write((self.__STARTRESMODS__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+mod+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceModality\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n\n").encode('utf-8')) oFile.write((self.__ENDRESMODS__).encode('utf-8')) for use in uses_serialized: oFile.write((self.__STARTRESUSES__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+use+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceUse\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n\n").encode('utf-8')) oFile.write((self.__ENDRESUSES__).encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) # iFile.write((self.__ENDRESCLS__).encode('utf-8')) iFile.write((self.__CLOSELINE__).encode('utf-8')) #other stuff self.set_newrtype_num_of_instances(newrtype)
def serializePapersAndAuthorListInManyFiles(self,verbose): name ="SerializePaper.serializePapersAndAuthorListInManyFiles" paper_serialized =set() # auth_serialized =set() paper_auth_serialized =set() # auth_affi_serialized =set() paper_status_serialized =set() #auth_affi_serialized =set() append_auth="authors" append_paper="papers" append_status="status" if (verbose==1): print "\t\tExecuting "+name #idx file idxfile=self.outfile+self.__INDEXOFPAPERS__ copy2(self.headerfile, idxfile) iFile=codecs.open(idxfile,'a','utf-8') iFile.write((self.__STARTPAPER__+"\n").encode('utf-8')) for p, papers in self.__p2papers.iteritems(): for paper in papers: ''' create <owl:NamedIndividual rdf:about="&paper;x/xyz/2d0249738f36125405e9333b23035856b20db21c"> calculate the shasum1 of the email ''' pid=str(paper.get_conf())+str(paper.get_year())+str(paper.get_pid()) pid=shasum(pid) if pid in paper_serialized: pass print pid else: paper_serialized.add(pid) #output file according to shasum first_order_folder = str(pid[:1]) second_order_folder = str(pid[0:3]) if not os.path.exists(self.outfile+"/" + self.__PAPERS__+first_order_folder): os.makedirs(self.outfile+"/" + self.__PAPERS__+"/"+first_order_folder) if not os.path.exists(self.outfile+"/" + self.__PAPERS__+first_order_folder + "/" + second_order_folder): os.makedirs(self.outfile+"/" + self.__PAPERS__+first_order_folder + "/" + second_order_folder) append_paper=self.__PAPERS__+first_order_folder + "/" + second_order_folder #write description file title=paper.get_title() title=title.replace("&", "&") iLine=self.__t1+"<rdf:Description rdf:about=\"&paper;"+append_paper+"/"+str(pid)+"\">" iFile.write((iLine+"\n").encode('utf-8')) iLine=self.__t2+"<dc:title>"+title+"</dc:title>" iFile.write((iLine+"\n").encode('utf-8')) iLine=self.__t1+"</rdf:Description>" iFile.write((iLine+"\n").encode('utf-8')) dst=self.outfile+"/"+append_paper+"/"+pid copy2(self.headerfile, dst) oFile=codecs.open(dst,'a','utf-8') #oFile.write((self.__STARTAUTH__+"\n").encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;"+append_paper+"/"+str(pid)+"\">" oFile.write((line+"\n").encode('utf-8')) ''' Create <rdf:type rdf:resource="&bibo;/Article"/> ''' line=self.__t2+"<rdf:type rdf:resource=\"&bibo;Article\"/>" oFile.write((line+"\n").encode('utf-8')) ''' create title ''' title=paper.get_title() title=title.replace("&", "&") line=self.__t2+"<dc:title>"+title+"</dc:title>" oFile.write((line+"\n").encode('utf-8')) ''' create the status ''' status=pruneName(paper.get_status(),"") if not status in paper_status_serialized: #print "XXXX "+status paper_status_serialized.add(status) ser=shasum(status) first_order_folder = str(ser[:1]) second_order_folder = str(ser[0:3]) append_status=self.__STATUS__+first_order_folder + "/" + second_order_folder line= self.__t2+"<bibo:status rdf:resource=\"&paper;"+append_status+"/"+ser+"\"/>" oFile.write((line+"\n").encode('utf-8')) else: pass #print "ZZZ "+status ''' create the authors''' auths=self.get_paper_2auths() #print len(auths) if len(auths)>0: line=self.__t2+"<bibo:authorList rdf:parseType=\"Collection\">" oFile.write((line+"\n").encode('utf-8')) # line=self.__t3+"<rdf:Seq>" # oFile.write((line+"\n").encode('utf-8')) for a in auths.get(pid): if not (a in paper_auth_serialized): ida=a.split("#")[2] first_order_folder = str(ida[:1]) second_order_folder = str(ida[0:3]) append_auth=self.__AUTHS__+first_order_folder + "/" + second_order_folder+"/" line=self.__t3+"<rdf:Description rdf:about=\"&auth;"+append_auth+ida+"\">" oFile.write((line+"\n").encode('utf-8')) line=self.__t4+"<rdfs:label>"+a.split("#")[1]+ " "+a.split("#")[0]+"</rdfs:label>" oFile.write((line+"\n").encode('utf-8')) line=self.__t3+"</rdf:Description>" oFile.write((line+"\n").encode('utf-8')) else: if (verbose==1): print "\t\tWARNING Author Element "+a+ " appears many times" # line=self.__t3+"</rdf:Seq>" # oFile.write((line+"\n").encode('utf-8')) line=self.__t2+"</bibo:authorList>" oFile.write((line+"\n").encode('utf-8')) ''' Close paper </owl:NamedIndividual> ''' line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) #end for author #end for passcode iFile.write((self.__ENDPAPER__+"\n").encode('utf-8')) iFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) ''' Each status in a distinct file ''' #idx file idxfile=self.outfile+"/"+self.__INDEXOFSTATUS__ copy2(self.headerfile, idxfile) iFile=codecs.open(idxfile,'a','utf-8') iFile.write((self.__STARTSTATUS__+"\n").encode('utf-8')) #write status for s in paper_status_serialized: ''' <owl:NamedIndividual rdf:about="&paper;undecided"> <rdf:type rdf:resource="&bibo2;DocumentStatus"/> </owl:NamedIndividual> ''' ser=shasum(s) first_order_folder = str(ser[:1]) second_order_folder = str(ser[0:3]) if not os.path.exists(self.outfile+"/" + self.__STATUS__+first_order_folder): os.makedirs(self.outfile+"/" + self.__STATUS__+"/"+first_order_folder) if not os.path.exists(self.outfile+"/" + self.__STATUS__+first_order_folder + "/" + second_order_folder): os.makedirs(self.outfile+"/" + self.__STATUS__+first_order_folder + "/" + second_order_folder) append_status=self.__STATUS__+first_order_folder + "/" + second_order_folder dst=self.outfile+"/"+append_status+"/"+ser copy2(self.headerfile, dst) oFile=codecs.open(dst,'a','utf-8') #write description file iLine=self.__t1+"<rdf:Description rdf:about=\"&paper;"+append_status+"/"+str(ser)+"\"></rdf:Description>" iFile.write((iLine+"\n").encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;#"+ser+"\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&paper;#status\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdfs:label>"+s+"</rdfs:label>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) iFile.write((self.__ENDSTATUS__ + "\n").encode('utf-8')) iFile.write((self.__CLOSELINE__ + "\n").encode('utf-8'))
def serializePapersAndAuthorListIntoSigleFile(self,verbose): name ="SerializePaper.serializePapersAndAuthorListIntoSigleFile" paper_serialized =set() paper_auth_serialized =set() paper_status_serialized =set() if (verbose==1): print "\t\tExecuting "+name oFile=codecs.open(self.outfile,'a','utf-8') oFile.write((self.__STARTPAPER__).encode('utf-8')) for p, papers in self.__p2papers.iteritems(): for paper in papers: ''' create <owl:NamedIndividual rdf:about="&paper;#2d0249738f36125405e9333b23035856b20db21c"> calculate the shasum1 of the email ''' pid=str(paper.get_conf())+str(paper.get_year())+str(paper.get_pid()) pid=shasum(pid) if pid in paper_serialized: pass #print p + " "+pid+ " "+paper.get_conf() else: paper_serialized.add(pid) #print p + " "+pid+ " "+paper.get_conf() line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;#"+str(pid)+"\">" oFile.write((line+"\n").encode('utf-8')) ''' Create <rdf:type rdf:resource="&bibo;Article" /> ''' line=self.__t2+"<rdf:type rdf:resource=\"&bibo;Article\"/>" oFile.write((line+"\n").encode('utf-8')) ''' create title ''' title=paper.get_title() title=title.replace("&", "&") line=self.__t2+"<dc:title>"+title+"</dc:title>" oFile.write((line+"\n").encode('utf-8')) ''' create the status ''' status=pruneName(paper.get_status(),"") if not status in paper_status_serialized: #print "XXXX "+status paper_status_serialized.add(status) status=shasum(status) line= self.__t2+"<bibo:status rdf:resource=\"&paper;#"+status+"\"/>" oFile.write((line+"\n").encode('utf-8')) else: pass #print "ZZZ "+status ''' create the authors''' auths=self.get_paper_2auths() #print auths if len(auths)>0: line=self.__t2+"<bibo:authorList rdf:parseType=\"Collection\">" oFile.write((line+"\n").encode('utf-8')) line=self.__t3+"<rdf:Seq>" #oFile.write((line+"\n").encode('utf-8')) for a in auths.get(pid): #print a if not (a in paper_auth_serialized): #<rdf:Description rdf:about line=self.__t3+"<rdf:Description rdf:about=\"&auth;#"+a.split("#")[2]+"\">" oFile.write((line+"\n").encode('utf-8')) line=self.__t4+"<rdfs:label>"+a.split("#")[1]+ " "+a.split("#")[0]+"</rdfs:label>" oFile.write((line+"\n").encode('utf-8')) line=self.__t3+"</rdf:Description>" oFile.write((line+"\n").encode('utf-8')) else: if (verbose==1): print "\t\tWARNING Author Element "+a+ " appears many times" #line=self.__t3+"</rdf:Seq>" #oFile.write((line+"\n").encode('utf-8')) line=self.__t2+"</bibo:authorList>" oFile.write((line+"\n").encode('utf-8')) ''' Close paper </owl:NamedIndividual> ''' line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) #end for papers #end for passcode oFile.write((self.__ENDPAPER__ + "\n").encode('utf-8')) oFile.write((self.__STARTSTATUS__ + "\n").encode('utf-8')) #write status for s in paper_status_serialized: ''' <owl:NamedIndividual rdf:about="&paper;undecided"> <rdf:type rdf:resource="&bibo2;DocumentStatus"/> </owl:NamedIndividual> ''' ser=shasum(s) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;#"+ser+"\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&paper;#status\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdfs:label>"+s+"</rdfs:label>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDSTATUS__ + "\n").encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) '''
def serializeConferenceIntoSigleFile(self, verbose): name = "SerializeConference.serializeConferenceIntoSigleFile" conf_serialized = set() location_serialized = set() if verbose == 1: print "\t\tExecuting " + name oFile = codecs.open(self.outfile, "a", "utf-8") oFile.write((self.__STARTCONF__).encode("utf-8")) """ write <owl:Class rdf:about="&conf;Lrec2014"> <rdfs:label xml:lang="en">Lrec2014</rdfs:label> <rdfs:subClassOf rdf:resource="&conf;Conference"/> </owl:Class> """ for p, confs in self.__p2confs.iteritems(): for conf in confs: idc = conf.get_conf() idy = conf.get_year() ty = conf.get_ty() se = conf.get_subevent() location = conf.get_location() event = "ConferenceEvent" subevent = "" idx = idc + "#" + idy # idx=idc idx = utils.pruneName(idx, " ") if not idx in conf_serialized: conf_serialized.add(idx) location_serialized.add(location) """ <owl:NamedIndividual rdf:about="&swc;C1"> <rdf:type rdf:resource="&swc;ConferenceEvent"/> <hasLocation rdf:resource="&swc;Reykjavik"/> </owl:NamedIndividual> """ if ty == "WS": event = "WorkshopEvent" subevent = '<swc:isSubEventOf rdf:resource="&swc;#' + se + '"/>' else: event = "ConferenceEvent" subevent = "" line = self.__t1 + '<owl:NamedIndividual rdf:about="&swc;#' + idc + idy + '">' oFile.write((line + "\n").encode("utf-8")) line = self.__t2 + "<rdfs:label>" + idc + " " + idy + "</rdfs:label>" oFile.write((line + "\n").encode("utf-8")) line = self.__t2 + '<rdf:type rdf:resource="&swc;#' + event + '"/>' oFile.write((line + "\n").encode("utf-8")) line = self.__t2 + '<tl:atYear rdf:datatype="&xsd;#gYear">' + idy + "</tl:atYear>" oFile.write((line + "\n").encode("utf-8")) line = self.__t2 + '<swc:hasLocation rdf:resource="&swc;#' + location + '"/>' oFile.write((line + "\n").encode("utf-8")) if subevent != "": line = self.__t2 + subevent oFile.write((line + "\n").encode("utf-8")) line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode("utf-8")) oFile.write((self.__ENDCONF__ + "\n").encode("utf-8")) """ <owl:NamedIndividual rdf:about="&year;2010"> <rdf:type rdf:resource="&year;2010"/> </owl:NamedIndividual> """ oFile.write((self.__STARTLOCLIST__).encode("utf-8")) for y in location_serialized: line = self.__t1 + '<owl:NamedIndividual rdf:about="&swc;#' + y + '">' oFile.write((line + "\n").encode("utf-8")) line = self.__t2 + '<rdf:type rdf:resource="&geo;#SpatialThing"/>' oFile.write((line + "\n").encode("utf-8")) line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode("utf-8")) oFile.write((self.__ENDLOCLIST__).encode("utf-8")) oFile.write((self.__CLOSELINE__ + "\n").encode("utf-8"))