def createPasscode2ResourceIdsAndResourceIds2SubmisionIds(self, verbose):
     name="SerializeResource.createPasscode2ResourceIdsAndResourceIds2SubmisionIds" 
     p2ri=collections.defaultdict(list) #passcode 2 rid
     p2s=collections.defaultdict(list) #passcode 2 subid
     rid2ris=collections.defaultdict(list)
     templistrid=[]
     templistsubid=[]
     if (verbose==1):
         print "\tExecuting "+name
     for p, ris in self.__p2ris.iteritems():
         for ri in ris:
             status=pruneName(ri.get_prodstatus(),"")
             status=modifyString(status,"/","_")
             pname=pruneName(ri.get_name(),"")
             pname=modifyString(pname,"/","-")
             pname=pname.replace("&", "&")
             subid=ri.get_conf()+ri.get_year()+self.__RIDSEP__+ri.get_passcode()
             rid=subid+self.__RIDSEP__+ri.get_type()+self.__RIDSEP__+pname+self.__RIDSEP__+status
             #print rid#.get_type() #
             if not (rid in templistrid):
                 templistrid.append(rid)
                 p2ri[p].append(rid)
                 #print rid#.get_type() #
                 rid2ris[rid].append(ri)
             if not (subid in templistsubid):
                 templistsubid.append(subid)
                 p2s[p].append(subid)    
                 
     self.set_p_2ris(p2ri) 
     self.set_p_2subs(p2s)     
     self.set_rid_2ris(rid2ris) 
    def createPasscode2AuthIdsAndAffiliationSetAndAuthIds2Affiliations(self,verbose):
        name="SerializeAuthor.createPasscode2AuthIdsAndAffiliationSetAndAuthIds2Affiliations"
        p2s=collections.defaultdict(list)
        s2a=collections.defaultdict(list)
        affiliations=collections.defaultdict(list)
        templistida=[]
        templistidaffi=[]
        if (verbose==1):
            print "\tExecuting "+name
        
        for p, auths in self.__p2auths.iteritems():
            for auth in auths:
                #print "\nPasscode "+p +" hasAuthEmail "+auth.get_email() +"\n" 
                idaffi="NOAFFI"
                #passcode to sha mappaing
                ida=shasum(auth.get_email())
                affiliation=auth.get_affiliation()
                name=affiliation.get_name()
                country=affiliation.get_country_code()
#                 if name is not None and country is not None:
                idaffi=pruneName(name,country)
#                 else:
#                     idaffi="NOAFFI"
#                     name="AFFINAME"
#                     country="None" 
                
                idaffi=shasum(idaffi)
                #print "XXX "+temp+ " "+idaffi
                if not (ida in templistida):
                    p2s[p].append(ida)
                    templistida.append(ida)
                else:
                    if (verbose==1):
                        print "\t\tWARNING Author Element "+ida+ " with email "+auth.get_email()+ " appears many times" 
                
                # affiliations
                #print "XXX "+temp+ " "+idaffi
                s2a[ida].append(idaffi)
                
                if not (idaffi in templistidaffi ):
                    templistidaffi.append(idaffi)
                    affiliations[idaffi].append(affiliation)
                else:
                    if (verbose==1):
                        print "\t\tWARNING Affiliation Element "+ida+ " with name "+name+ " and country "+country+ " appears many times"
                              
        self.set_pass_2_sha(p2s)
        self.set_affiliation_id_2_details(affiliations)
        self.set_sha_2_affiliation(s2a)    
    def createPasscode2SubsIdsAndSubsId2PapersAndSubsId2Confs(self, verbose):
        name = "SerializeSubmission.createPasscode2SubsIdsAndSubsId2PapersAndSubsId2Confs"
        p2subs = collections.defaultdict(list)  # passcode2 subid
        p2ps = collections.defaultdict(list)  # paper id to paper id
        p2cs = collections.defaultdict(list)  # paper id to conf id
        p2pobj = collections.defaultdict(list)  # paper id to paper obj
        templistpid = []
        templistconfid = []
        templistsubid = []
        if verbose == 1:
            print "\tExecuting " + name
        for p, subs in self.__p2subs.iteritems():
            papers = self.__p2papers.get(p)
            confs = self.__p2confs.get(p)

            for s in subs:
                subid = s.get_conf() + s.get_year() + "#" + s.get_passcode()
                if not (subid in templistsubid):
                    templistsubid.append(subid)
                    p2subs[p].append(subid)
                if papers is not None:
                    for paper in papers:
                        pid = str(paper.get_conf()) + str(paper.get_year()) + str(paper.get_pid())
                        pid = shasum(pid)
                        if not (pid in templistpid):
                            templistpid.append(pid)
                            p2ps[p].append(pid)
                            p2pobj[p].append(paper)

                if confs is not None:
                    for conf in confs:
                        idc = conf.get_conf()
                        idy = conf.get_year()
                        idx = idc + idy

                        idx = utils.pruneName(idx, " ")

                        if not (idx in templistconfid):
                            templistconfid.append(idx)
                        p2cs[p].append(idc + idy)

        self.set_pass_2subs(p2subs)
        self.set_pass_2papers(p2ps)
        self.set_pass_2confs(p2cs)
        self.set_pass_2paper_obj(p2pobj)
    def serializeResourcesIntoManyFile(self,verbose):
        name ="SerializeResource.serializeResourcesIntoManyFile"   
        rtype_subclassed =set()
        newrtype =collections.defaultdict(set)
        names_serialized =set()
        avails_serialized =set()
        status_serialized =set()
        mods_serialized =set()
        l1_serialized =set()
        uses_serialized =set()
        res_serialized =set()
        append_res=""
        
        ns="&lremap;"
        #print self.get_rid_2ris()
        if (verbose==1):
            print "\t\tExecuting "+name
        #idx file
        idxfile=self.outfile+self.__INDEXOFRES__
        copy2(self.headerfile, idxfile)
        iFile=codecs.open(idxfile,'a','utf-8')
        iFile.write((self.__STARTRES__+"\n").encode('utf-8'))    
        
        for p, rids in self.get_p_2ris().iteritems():
            for rid in rids:
                rtype_subclassed =set()
                newrtype =collections.defaultdict(set)
                names_serialized =set()
                avails_serialized =set()
                status_serialized =set()
                mods_serialized =set()
                l1_serialized =set()
                uses_serialized =set()
                res_serialized =set()
                    
                '''
                prepare the structure for new classes
                '''
                ris=self.get_rid_2ris().get(rid)
                if ris is not None:
                    for ri in ris:
                        #print rid + " "+ str(ri)
                        '''
                        create the description file
                
                        '''
                        rid1=shasum(rid)
                        first_order_folder = str(rid1[:1])
                        second_order_folder = str(rid1[0:3])
                        if not os.path.exists(self.outfile+"/" + self.__RES__+first_order_folder):
                            os.makedirs(self.outfile+"/" + self.__RES__+"/"+first_order_folder)
                        if not os.path.exists(self.outfile+"/" + self.__RES__+first_order_folder + "/" + second_order_folder):
                            os.makedirs(self.outfile+"/" + self.__RES__+first_order_folder + "/" + second_order_folder)
                
                        append_res=self.__RES__+first_order_folder + "/" + second_order_folder
                        #write description file 
                        iLine=self.__t1+"<rdf:Description rdf:about=\"&ri;"+"/"+append_res+"/"+str(rid1)+"\">"
                        iFile.write((iLine+"\n").encode('utf-8'))
                        label="Conference: "+ri.get_conf()+ " and "
                        label=label+"Year: "+ri.get_year()+" and "
                        label=label+"Passcode: "+ri.get_passcode()+" and "
                        label=label+"Type: "+ri.get_type()+" and "
                        label=label+"Name: "+ri.get_name().decode(encoding='UTF-8',errors='strict')+" "
                        iLine=self.__t2+"<rdfs:label>"+label+"</rdfs:label>"
                        #iFile.write((iLine+"\n").encode('utf-8')) 
                        iFile.write((iLine+"\n"))
                        iLine=self.__t1+"</rdf:Description>"
                        #iFile.write((iLine+"\n").encode('utf-8'))
                        iFile.write((iLine+"\n"))
                        
                        
                        # as usual
                        rtype=ri.get_type()
                        #print rid + " "+ str(rtype)
                        rtype=pruneName(rtype, "")
                        #rtype="Language_Resources/Technologies_Infrastructure"
                        rtype=modifyString(rtype,"/","-")
                        rtype=rtype.replace("&", "&amp;")
                        if not (rtype in self.__RTYPES__):
                            ns="&ri;#"
                            if not rtype in rtype_subclassed:
                                #print "SUB CLASS "+ rtype 
                                rtype_subclassed.add(rtype)
                                newrtype[rtype].add(1)
                                
                                
                            else:
                                
                                nrc=newrtype.get(rtype)
                                if nrc is not None:
                                    for num in nrc:
                                        #print num
                                        del newrtype[rtype]
                                        newrtype[rtype].add(num+1)
                                else:
                                    if (self.verbose==1):
                                        print "\t\t Warning resource type "+rtype+ " NOT YET ENCOUNTERED"        
                        else:
                            ns="&lremap;"  
                        
                        '''
                        create the resourcename
                            <owl:NamedIndividual rdf:about="&ri;myname">
                                <rdf:type rdf:resource="&lremap;ResourceName"/>
                            </owl:NamedIndividual>
                        '''  
                            
                        
                        dst=self.outfile+"/"+append_res+"/"+rid1
                        copy2(self.headerfile, dst)
                        oFile=codecs.open(dst,'a','utf-8')
                        line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+str(rid1)+ "\">" 
                        oFile.write((line + "\n").encode('utf-8'))
                        line=self.__t2+"<rdf:type rdf:resource=\""+ns+rtype+"\"/>" 
                        oFile.write((line + "\n").encode('utf-8'))     
                        rname=ri.get_name()
                        #print rid + " "+ str(rtype)
                        rname=pruneName(rname, "")
                        rname=modifyString(rname,"^","-")
                        #rname="-"
                        rname=rname.replace("&", "&amp;")
                        if not (rname in names_serialized):
                            names_serialized.add(rname)
                        
                        #avail="Freely_Available_aaa"    
                        avail= ri.get_avail()
                        
                        if avail is not None:
                            
                            avail=pruneName(avail, "")
                            avail=modifyString(avail,"^","-")
                            avail=avail.replace("&", "&amp;")
                            if not (avail in self.__RAVAILS__):
                                avails_serialized.add(avail)
                                line=self.__t2+"<lremap:hasResourceAvailability rdf:resource=\"&ri;"+append_res+"/"+avail+ "\"/>" 
                            else:
                                line=self.__t2+"<lremap:hasResourceAvailability rdf:resource=\"&lremap;"+avail+ "\"/>"
                                    
                            oFile.write((line + "\n").encode('utf-8'))
                        
                        status= ri.get_prodstatus()
                        if (status is not None) and (status != "NoStatus"):
                            
                            status=pruneName(status, "")
                            status=modifyString(status,"^","-")
                            status=status.replace("&", "&amp;")
                            if not (status in self.__RSTATUS__):
                                status_serialized.add(status)
                                line=self.__t2+"<lremap:hasResourceStatus rdf:resource=\"&ri;"+append_res+"/"+status+ "\"/>"
                            else:
                                line=self.__t2+"<lremap:hasResourceStatus rdf:resource=\"&lremap;"+status+ "\"/>"
                                    
                            oFile.write((line + "\n").encode('utf-8'))
                            
                        
                        mod= ri.get_modality()
                        
                        if mod is not None:
                            
                            mod=pruneName(mod, "")
                            mod=modifyString(mod,"^","-")
                            mod=mod.replace("&", "&amp;")
                            if not (mod in self.__RMODS__):
                                mods_serialized.add(mod)
                                line=self.__t2+"<lremap:hasResourceModality rdf:resource=\"&ri;"+append_res+"/"+mod+ "\"/>" 
                            else:
                                line=self.__t2+"<lremap:hasResourceModality rdf:resource=\"&lremap;"+mod+ "\"/>"
                                    
                            oFile.write((line + "\n").encode('utf-8'))
                            
                        
                        use= ri.get_resourceusage()
                        
                        if use is not None:
                            
                            use=pruneName(use, "")
                            use=modifyString(use,"^","-")
                            use=use.replace("&", "&amp;")
                            if not (use in self.__RUSES__):
                                uses_serialized.add(use)
                                line=self.__t2+"<lremap:hasResourceUse rdf:resource=\"&ri;"+append_res+"/"+use+ "\"/>" 
                            else:
                                line=self.__t2+"<lremap:hasResourceUse rdf:resource=\"&lremap;"+use+ "\"/>"
                                    
                            oFile.write((line + "\n").encode('utf-8'))            
                        
                        '''
                        create the individual with all properties
                            <owl:NamedIndividual rdf:about="&ri;MyCorpus">
                                <rdf:type rdf:resource="&lremap;Corpus"/>
                                <lvont:language rdf:resource="&lexvo;id/iso639-3/ita"/>
                                <lremap:hasResourceLanguageType rdf:resource="&lremap;Bi"/>
                                <lremap:hasResourceAvailability rdf:resource="&lremap;Freely_Available"/>
                                <lremap:hasResourceName rdf:resource="&ri;myname"/>
                            </owl:NamedIndividual> 
                        '''  
                        
                        
                        #line=self.__t2+"<lremap:hasResourceName rdf:resource=\"&ri;"+append_res+"/"+rname+ "\"/>" 
                        line=self.__t2+"<lremap:hasResourceName rdf:resource=\"&lremap;"+rname.decode(encoding='UTF-8',errors='strict')+ "\"/>" 
                        #oFile.write((line + "\n").encode('utf-8'))
                        oFile.write(line+"\n")
                    
                        
                        
                            
                            
                        # referencing submissions
                        pids =self.get_p_2subs().get(p)
                        
                        if pids is not None:
                            for pid in pids:
                                line=self.__t2+"<dcterms:references rdf:resource=\"&sub;#"+str(pid)+"\"/>"
                                oFile.write((line + "\n").encode('utf-8')) 
                        #closing individual
                        line=self.__t1+"</owl:NamedIndividual>" 
                        oFile.write((line + "\n").encode('utf-8'))                    
                                        
                        oFile.write((self.__ENDRES__ + "\n").encode('utf-8'))
        
                        # create subclass
                       
                        oFile.write((self.__STARTRESCLS__).encode('utf-8'))    
                        '''
                        create  <owl:Class rdf:about="&ri;MyRT">
                        <rdfs:subClassOf rdf:resource="&lremap;ResourceType"/>
                        </owl:Class>
                        '''
                        for rc in rtype_subclassed:
                            line=self.__t1+"<owl:Class rdf:about=\"&ri;"+append_res+"/"+rc+ "\">" 
                            oFile.write((line + "\n").encode('utf-8'))
                            line=self.__t2+"<rdfs:subClassOf rdf:resource=\"&lremap;ResourceType\"/>" 
                            oFile.write((line + "\n").encode('utf-8')) 
                            line=self.__t1+"</owl:Class>" 
                            oFile.write((line + "\n").encode('utf-8'))      
        
                        
                        oFile.write((self.__ENDRESCLS__).encode('utf-8'))
        
        
                            #resourcenames inds
                        for rname in names_serialized:
                        
                            oFile.write((self.__STARTRESNAMES__).encode('utf-8'))
                        
                            #line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+rname+ "\">" 
                            line=self.__t1+"<owl:NamedIndividual rdf:about=\"&lremap;"+append_res+"/"+rname.decode(encoding='UTF-8',errors='strict')+ "\">" 
                            #oFile.write((line + "\n").encode('utf-8'))
                            oFile.write(line+"\n")
                       
                            if rname!="-":
                                line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceName\"/>" 
                            else:
                                line=self.__t2+"<rdf:type rdf:resource=\"&lremap;NoName\"/>"     
                            oFile.write((line + "\n").encode('utf-8')) 
                            line=self.__t1+"</owl:NamedIndividual>" 
                            oFile.write((line + "\n").encode('utf-8')) 
        
        
                            oFile.write((self.__ENDRESNAMES__).encode('utf-8'))
        
        
        
        
        
                       
        
                        for avail in avails_serialized:
                            oFile.write((self.__STARTRESAVAILS__).encode('utf-8'))
                            line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+avail+ "\">" 
                            oFile.write((line + "\n").encode('utf-8'))
                            line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceAvailability\"/>" 
              
                            oFile.write((line + "\n").encode('utf-8')) 
                            line=self.__t1+"</owl:NamedIndividual>" 
                            oFile.write((line + "\n").encode('utf-8')) 
                            oFile.write((self.__ENDRESAVAILS__).encode('utf-8'))
        
                        
                        for status in status_serialized:
                            oFile.write((self.__STARTRESSTATUS__).encode('utf-8'))
        
                        
                            line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+status+ "\">" 
                            oFile.write((line + "\n").encode('utf-8'))
                            line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceStatus\"/>" 
              
                            oFile.write((line + "\n").encode('utf-8')) 
                            line=self.__t1+"</owl:NamedIndividual>" 
                            oFile.write((line + "\n\n").encode('utf-8')) 
        
                    
                            oFile.write((self.__ENDRESSTATUS__).encode('utf-8'))
        
                        for mod in mods_serialized:
                            oFile.write((self.__STARTRESMODS__).encode('utf-8'))
        
                            line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+mod+ "\">" 
                            oFile.write((line + "\n").encode('utf-8'))
                            line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceModality\"/>" 
              
                            oFile.write((line + "\n").encode('utf-8')) 
                            line=self.__t1+"</owl:NamedIndividual>" 
                            oFile.write((line + "\n\n").encode('utf-8')) 
        
        
                            oFile.write((self.__ENDRESMODS__).encode('utf-8'))
        
                        for use in uses_serialized:
                            oFile.write((self.__STARTRESUSES__).encode('utf-8'))
        
                        
                            line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+use+ "\">" 
                            oFile.write((line + "\n").encode('utf-8'))
                            line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceUse\"/>" 
              
                            oFile.write((line + "\n").encode('utf-8')) 
                            line=self.__t1+"</owl:NamedIndividual>" 
                            oFile.write((line + "\n\n").encode('utf-8')) 
        
        
                            oFile.write((self.__ENDRESUSES__).encode('utf-8'))
                        
                        oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8'))    
#             iFile.write((self.__ENDRESCLS__).encode('utf-8'))     
        iFile.write((self.__CLOSELINE__).encode('utf-8'))              
        
        #other stuff
        self.set_newrtype_num_of_instances(newrtype)
Beispiel #5
0
    def serializePapersAndAuthorListInManyFiles(self,verbose):
        name ="SerializePaper.serializePapersAndAuthorListInManyFiles" 
        paper_serialized =set() # auth_serialized =set()
        paper_auth_serialized =set() # auth_affi_serialized =set()
        paper_status_serialized =set()  #auth_affi_serialized =set()
        
        
        append_auth="authors"
        append_paper="papers"
        append_status="status"
        
        if (verbose==1):
            print "\t\tExecuting "+name

        
        #idx file
        idxfile=self.outfile+self.__INDEXOFPAPERS__
          
        copy2(self.headerfile, idxfile)
        iFile=codecs.open(idxfile,'a','utf-8')
        iFile.write((self.__STARTPAPER__+"\n").encode('utf-8')) 
        
        for p, papers in self.__p2papers.iteritems():
            for paper in papers:
                '''
                create <owl:NamedIndividual rdf:about="&paper;x/xyz/2d0249738f36125405e9333b23035856b20db21c">
                calculate the shasum1 of the email
                '''
                pid=str(paper.get_conf())+str(paper.get_year())+str(paper.get_pid())
                pid=shasum(pid)
                
                    
                if pid in paper_serialized:
                    pass
                    print pid
                else:
                    paper_serialized.add(pid)
                    #output file according to shasum
                    first_order_folder = str(pid[:1])
                    second_order_folder = str(pid[0:3])
                    if not os.path.exists(self.outfile+"/" + self.__PAPERS__+first_order_folder):
                        os.makedirs(self.outfile+"/" + self.__PAPERS__+"/"+first_order_folder)
                    if not os.path.exists(self.outfile+"/" + self.__PAPERS__+first_order_folder + "/" + second_order_folder):
                        os.makedirs(self.outfile+"/" + self.__PAPERS__+first_order_folder + "/" + second_order_folder)
                        
                    append_paper=self.__PAPERS__+first_order_folder + "/" + second_order_folder
                    
                    #write description file
                    title=paper.get_title()
                    title=title.replace("&", "&amp;")
                    iLine=self.__t1+"<rdf:Description rdf:about=\"&paper;"+append_paper+"/"+str(pid)+"\">"
                    iFile.write((iLine+"\n").encode('utf-8')) 
                    iLine=self.__t2+"<dc:title>"+title+"</dc:title>"
                    iFile.write((iLine+"\n").encode('utf-8')) 
                    iLine=self.__t1+"</rdf:Description>"
                    iFile.write((iLine+"\n").encode('utf-8')) 
                    
                    
                      
                    dst=self.outfile+"/"+append_paper+"/"+pid
                    copy2(self.headerfile, dst)
                    oFile=codecs.open(dst,'a','utf-8')  
                    #oFile.write((self.__STARTAUTH__+"\n").encode('utf-8'))
                    
                    line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;"+append_paper+"/"+str(pid)+"\">"
                    oFile.write((line+"\n").encode('utf-8'))    
                
                    '''
                    Create <rdf:type rdf:resource="&bibo;/Article"/>
                    '''
                    line=self.__t2+"<rdf:type rdf:resource=\"&bibo;Article\"/>"
                    oFile.write((line+"\n").encode('utf-8'))
                
                    '''
                    create title
                    '''
                    title=paper.get_title()
                    title=title.replace("&", "&amp;")
                    line=self.__t2+"<dc:title>"+title+"</dc:title>"
                    oFile.write((line+"\n").encode('utf-8'))  
                
                    '''
                    create the status
                    '''
                    status=pruneName(paper.get_status(),"")
                    if not status in paper_status_serialized:
                        #print "XXXX "+status
                        paper_status_serialized.add(status)
                        ser=shasum(status)
                        first_order_folder = str(ser[:1])
                        second_order_folder = str(ser[0:3])
                        append_status=self.__STATUS__+first_order_folder + "/" + second_order_folder
                        line= self.__t2+"<bibo:status rdf:resource=\"&paper;"+append_status+"/"+ser+"\"/>"
                        oFile.write((line+"\n").encode('utf-8'))
                    else:
                        pass
                        #print "ZZZ "+status  
                    
                    ''' create the authors'''
                    auths=self.get_paper_2auths()
                    #print len(auths)
                    if len(auths)>0:
                        line=self.__t2+"<bibo:authorList rdf:parseType=\"Collection\">"
                        oFile.write((line+"\n").encode('utf-8'))
#                         line=self.__t3+"<rdf:Seq>"
#                         oFile.write((line+"\n").encode('utf-8'))
                        for a in auths.get(pid):
                            if not (a in paper_auth_serialized):
                                ida=a.split("#")[2]
                                first_order_folder = str(ida[:1])
                                second_order_folder = str(ida[0:3])
                                append_auth=self.__AUTHS__+first_order_folder + "/" + second_order_folder+"/"
                                line=self.__t3+"<rdf:Description rdf:about=\"&auth;"+append_auth+ida+"\">"
                                oFile.write((line+"\n").encode('utf-8'))
                                line=self.__t4+"<rdfs:label>"+a.split("#")[1]+ " "+a.split("#")[0]+"</rdfs:label>"
                                oFile.write((line+"\n").encode('utf-8'))
                                line=self.__t3+"</rdf:Description>"
                                oFile.write((line+"\n").encode('utf-8'))
                               
                            else:
                                if (verbose==1):
                                    print "\t\tWARNING Author Element "+a+ "  appears many times"
                                    
#                         line=self.__t3+"</rdf:Seq>"
#                         oFile.write((line+"\n").encode('utf-8'))
                        line=self.__t2+"</bibo:authorList>"
                        oFile.write((line+"\n").encode('utf-8'))
                    
                    '''
                    Close paper </owl:NamedIndividual>
                    '''
                    line = self.__t1 + "</owl:NamedIndividual>"
                    oFile.write((line + "\n").encode('utf-8'))    
                
                    
                        
                    oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8'))       
            
            #end for author
        #end for passcode
        iFile.write((self.__ENDPAPER__+"\n").encode('utf-8'))
        iFile.write((self.__CLOSELINE__ + "\n").encode('utf-8'))    
        
        
        '''
        Each status in a distinct file
        '''
        #idx file
        idxfile=self.outfile+"/"+self.__INDEXOFSTATUS__
        copy2(self.headerfile, idxfile)
        iFile=codecs.open(idxfile,'a','utf-8')
        iFile.write((self.__STARTSTATUS__+"\n").encode('utf-8'))
        #write status
        for s in paper_status_serialized:
            '''
            <owl:NamedIndividual rdf:about="&paper;undecided">
                <rdf:type rdf:resource="&bibo2;DocumentStatus"/>
            </owl:NamedIndividual>
            '''
            
            ser=shasum(s)
            first_order_folder = str(ser[:1])
            second_order_folder = str(ser[0:3])
            if not os.path.exists(self.outfile+"/" + self.__STATUS__+first_order_folder):
                os.makedirs(self.outfile+"/" + self.__STATUS__+"/"+first_order_folder)
            if not os.path.exists(self.outfile+"/" + self.__STATUS__+first_order_folder + "/" + second_order_folder):
                os.makedirs(self.outfile+"/" + self.__STATUS__+first_order_folder + "/" + second_order_folder)
            append_status=self.__STATUS__+first_order_folder + "/" + second_order_folder
            dst=self.outfile+"/"+append_status+"/"+ser
            copy2(self.headerfile, dst)
            oFile=codecs.open(dst,'a','utf-8')
            #write description file 
            iLine=self.__t1+"<rdf:Description rdf:about=\"&paper;"+append_status+"/"+str(ser)+"\"></rdf:Description>"
            iFile.write((iLine+"\n").encode('utf-8'))    
            line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;#"+ser+"\">"
            oFile.write((line + "\n").encode('utf-8'))  
            line=self.__t2+"<rdf:type rdf:resource=\"&paper;#status\"/>"
            oFile.write((line + "\n").encode('utf-8'))
            
            line=self.__t2+"<rdfs:label>"+s+"</rdfs:label>"
            oFile.write((line + "\n").encode('utf-8'))
            line=self.__t1+"</owl:NamedIndividual>"
            oFile.write((line + "\n").encode('utf-8'))
        
            
            oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8'))
         
        
        iFile.write((self.__ENDSTATUS__ + "\n").encode('utf-8'))
        iFile.write((self.__CLOSELINE__ + "\n").encode('utf-8'))
Beispiel #6
0
    def serializePapersAndAuthorListIntoSigleFile(self,verbose):
        name ="SerializePaper.serializePapersAndAuthorListIntoSigleFile"   
        paper_serialized =set()
        paper_auth_serialized =set()
        paper_status_serialized =set()
        
        if (verbose==1):
            print "\t\tExecuting "+name

            
        oFile=codecs.open(self.outfile,'a','utf-8')
        oFile.write((self.__STARTPAPER__).encode('utf-8'))    
        for p, papers in self.__p2papers.iteritems():
            for paper in papers:
                '''
                create <owl:NamedIndividual rdf:about="&paper;#2d0249738f36125405e9333b23035856b20db21c">
                calculate the shasum1 of the email
                '''
                pid=str(paper.get_conf())+str(paper.get_year())+str(paper.get_pid())
                pid=shasum(pid)
                if pid in paper_serialized:
                    pass
                    #print p + " "+pid+ " "+paper.get_conf()
                else:
                    paper_serialized.add(pid)    
                    #print p + " "+pid+ " "+paper.get_conf()
                    line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;#"+str(pid)+"\">"
                    oFile.write((line+"\n").encode('utf-8'))
                    
                    
                    '''
                    Create <rdf:type rdf:resource="&bibo;Article" />
                    '''
                    line=self.__t2+"<rdf:type rdf:resource=\"&bibo;Article\"/>"
                    oFile.write((line+"\n").encode('utf-8'))
                    
                    '''
                    create title
                    '''
                    title=paper.get_title()
                    title=title.replace("&", "&amp;")
                    line=self.__t2+"<dc:title>"+title+"</dc:title>"
                    oFile.write((line+"\n").encode('utf-8'))
                    
                    '''
                    create the status
                    '''
                    status=pruneName(paper.get_status(),"")
                    if not status in paper_status_serialized:
                        #print "XXXX "+status
                        paper_status_serialized.add(status)
                        
                        status=shasum(status)
                        
                        line= self.__t2+"<bibo:status rdf:resource=\"&paper;#"+status+"\"/>"
                        oFile.write((line+"\n").encode('utf-8'))
                    else:
                        pass
                        #print "ZZZ "+status
                    ''' create the authors'''
                    auths=self.get_paper_2auths()
                    #print auths
                    
                    if len(auths)>0:
                        line=self.__t2+"<bibo:authorList rdf:parseType=\"Collection\">"
                        oFile.write((line+"\n").encode('utf-8'))
                        line=self.__t3+"<rdf:Seq>"
                        #oFile.write((line+"\n").encode('utf-8'))
                        for a in auths.get(pid):
                            
                            #print a
                            if not (a in paper_auth_serialized):
                                #<rdf:Description rdf:about
                                line=self.__t3+"<rdf:Description rdf:about=\"&auth;#"+a.split("#")[2]+"\">"
                                oFile.write((line+"\n").encode('utf-8'))
                                line=self.__t4+"<rdfs:label>"+a.split("#")[1]+ " "+a.split("#")[0]+"</rdfs:label>"
                                oFile.write((line+"\n").encode('utf-8'))
                                line=self.__t3+"</rdf:Description>"
                                oFile.write((line+"\n").encode('utf-8'))
                            else:
                                if (verbose==1):
                                    print "\t\tWARNING Author Element "+a+ "  appears many times"
                                    
                        #line=self.__t3+"</rdf:Seq>"
                        #oFile.write((line+"\n").encode('utf-8'))
                        line=self.__t2+"</bibo:authorList>"
                        oFile.write((line+"\n").encode('utf-8'))
                    '''
                    Close paper </owl:NamedIndividual>
                    '''
                    line = self.__t1 + "</owl:NamedIndividual>"
                    oFile.write((line + "\n").encode('utf-8'))       
            
            #end for papers
        #end for passcode
        oFile.write((self.__ENDPAPER__ + "\n").encode('utf-8'))   
        oFile.write((self.__STARTSTATUS__ + "\n").encode('utf-8'))
        
        #write status
        for s in paper_status_serialized:
            '''
            <owl:NamedIndividual rdf:about="&paper;undecided">
                <rdf:type rdf:resource="&bibo2;DocumentStatus"/>
            </owl:NamedIndividual>
            '''
            
            ser=shasum(s)
            line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;#"+ser+"\">"
            oFile.write((line + "\n").encode('utf-8'))  
            line=self.__t2+"<rdf:type rdf:resource=\"&paper;#status\"/>"
            oFile.write((line + "\n").encode('utf-8'))
            
            
            
            line=self.__t2+"<rdfs:label>"+s+"</rdfs:label>"
            oFile.write((line + "\n").encode('utf-8'))
            line=self.__t1+"</owl:NamedIndividual>"
            oFile.write((line + "\n").encode('utf-8'))
        
        oFile.write((self.__ENDSTATUS__ + "\n").encode('utf-8'))
        oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8'))
        
        
        
        '''
    def serializeConferenceIntoSigleFile(self, verbose):
        name = "SerializeConference.serializeConferenceIntoSigleFile"
        conf_serialized = set()
        location_serialized = set()

        if verbose == 1:
            print "\t\tExecuting " + name

        oFile = codecs.open(self.outfile, "a", "utf-8")
        oFile.write((self.__STARTCONF__).encode("utf-8"))

        """
        write
         <owl:Class rdf:about="&conf;Lrec2014">
        <rdfs:label xml:lang="en">Lrec2014</rdfs:label>
        <rdfs:subClassOf rdf:resource="&conf;Conference"/>
    </owl:Class>
        """
        for p, confs in self.__p2confs.iteritems():
            for conf in confs:
                idc = conf.get_conf()
                idy = conf.get_year()
                ty = conf.get_ty()
                se = conf.get_subevent()
                location = conf.get_location()
                event = "ConferenceEvent"
                subevent = ""
                idx = idc + "#" + idy
                # idx=idc
                idx = utils.pruneName(idx, " ")
                if not idx in conf_serialized:

                    conf_serialized.add(idx)
                    location_serialized.add(location)
                    """
                    <owl:NamedIndividual rdf:about="&swc;C1">
                        <rdf:type rdf:resource="&swc;ConferenceEvent"/>
                        <hasLocation rdf:resource="&swc;Reykjavik"/>
                    </owl:NamedIndividual>
                    
                    """
                    if ty == "WS":
                        event = "WorkshopEvent"
                        subevent = '<swc:isSubEventOf rdf:resource="&swc;#' + se + '"/>'
                    else:
                        event = "ConferenceEvent"
                        subevent = ""

                    line = self.__t1 + '<owl:NamedIndividual rdf:about="&swc;#' + idc + idy + '">'
                    oFile.write((line + "\n").encode("utf-8"))
                    line = self.__t2 + "<rdfs:label>" + idc + " " + idy + "</rdfs:label>"
                    oFile.write((line + "\n").encode("utf-8"))
                    line = self.__t2 + '<rdf:type rdf:resource="&swc;#' + event + '"/>'
                    oFile.write((line + "\n").encode("utf-8"))
                    line = self.__t2 + '<tl:atYear rdf:datatype="&xsd;#gYear">' + idy + "</tl:atYear>"
                    oFile.write((line + "\n").encode("utf-8"))
                    line = self.__t2 + '<swc:hasLocation rdf:resource="&swc;#' + location + '"/>'
                    oFile.write((line + "\n").encode("utf-8"))
                    if subevent != "":
                        line = self.__t2 + subevent
                        oFile.write((line + "\n").encode("utf-8"))
                    line = self.__t1 + "</owl:NamedIndividual>"
                    oFile.write((line + "\n").encode("utf-8"))

        oFile.write((self.__ENDCONF__ + "\n").encode("utf-8"))

        """
        <owl:NamedIndividual rdf:about="&year;2010">
            <rdf:type rdf:resource="&year;2010"/>
        </owl:NamedIndividual>
        """
        oFile.write((self.__STARTLOCLIST__).encode("utf-8"))
        for y in location_serialized:

            line = self.__t1 + '<owl:NamedIndividual rdf:about="&swc;#' + y + '">'
            oFile.write((line + "\n").encode("utf-8"))
            line = self.__t2 + '<rdf:type rdf:resource="&geo;#SpatialThing"/>'
            oFile.write((line + "\n").encode("utf-8"))
            line = self.__t1 + "</owl:NamedIndividual>"
            oFile.write((line + "\n").encode("utf-8"))

        oFile.write((self.__ENDLOCLIST__).encode("utf-8"))
        oFile.write((self.__CLOSELINE__ + "\n").encode("utf-8"))