def createPasscode2AuthIdsAndAffiliationSetAndAuthIds2Affiliations(self,verbose): name="SerializeAuthor.createPasscode2AuthIdsAndAffiliationSetAndAuthIds2Affiliations" p2s=collections.defaultdict(list) s2a=collections.defaultdict(list) affiliations=collections.defaultdict(list) templistida=[] templistidaffi=[] if (verbose==1): print "\tExecuting "+name for p, auths in self.__p2auths.iteritems(): for auth in auths: #print "\nPasscode "+p +" hasAuthEmail "+auth.get_email() +"\n" idaffi="NOAFFI" #passcode to sha mappaing ida=shasum(auth.get_email()) affiliation=auth.get_affiliation() name=affiliation.get_name() country=affiliation.get_country_code() # if name is not None and country is not None: idaffi=pruneName(name,country) # else: # idaffi="NOAFFI" # name="AFFINAME" # country="None" idaffi=shasum(idaffi) #print "XXX "+temp+ " "+idaffi if not (ida in templistida): p2s[p].append(ida) templistida.append(ida) else: if (verbose==1): print "\t\tWARNING Author Element "+ida+ " with email "+auth.get_email()+ " appears many times" # affiliations #print "XXX "+temp+ " "+idaffi s2a[ida].append(idaffi) if not (idaffi in templistidaffi ): templistidaffi.append(idaffi) affiliations[idaffi].append(affiliation) else: if (verbose==1): print "\t\tWARNING Affiliation Element "+ida+ " with name "+name+ " and country "+country+ " appears many times" self.set_pass_2_sha(p2s) self.set_affiliation_id_2_details(affiliations) self.set_sha_2_affiliation(s2a)
def createPasscode2PaperIdsAndPaperIds2Authors(self,verbose): name="SerializePaper.createPasscode2PaperIdsAndPaperIds2Authors" p2pid=collections.defaultdict(list) #passcode2 papers p2as=collections.defaultdict(list) #paper id to auth id templistpid=[] templistida=[] if (verbose==1): print "\tExecuting "+name for p, papers in self.__p2papers.iteritems(): for paper in papers: pid=str(paper.get_conf())+str(paper.get_year())+str(paper.get_pid()) pid=shasum(pid) if not (pid in templistpid): p2pid[p].append(pid) else: if (verbose==1): print "\t\tWARNING Paper Element "+pid+ " appears many times" #managing authors auths=self.__p2auths.get(p) if auths is not None: if (len(auths)==0): if (verbose==1): print "\t\tWARNING Paper Element "+pid+ " has No author" else: for a in auths: chk=a.get_firstname()+"#"+a.get_lastname() ida=shasum(a.get_email()) chk=chk+"#"+ida if not (ida in templistida): p2as[pid].append(chk) #end for self.set_paper_2auths(p2as) self.set_pass_2_shap(p2pid)
def createPasscode2SubsIdsAndSubsId2PapersAndSubsId2Confs(self, verbose): name = "SerializeSubmission.createPasscode2SubsIdsAndSubsId2PapersAndSubsId2Confs" p2subs = collections.defaultdict(list) # passcode2 subid p2ps = collections.defaultdict(list) # paper id to paper id p2cs = collections.defaultdict(list) # paper id to conf id p2pobj = collections.defaultdict(list) # paper id to paper obj templistpid = [] templistconfid = [] templistsubid = [] if verbose == 1: print "\tExecuting " + name for p, subs in self.__p2subs.iteritems(): papers = self.__p2papers.get(p) confs = self.__p2confs.get(p) for s in subs: subid = s.get_conf() + s.get_year() + "#" + s.get_passcode() if not (subid in templistsubid): templistsubid.append(subid) p2subs[p].append(subid) if papers is not None: for paper in papers: pid = str(paper.get_conf()) + str(paper.get_year()) + str(paper.get_pid()) pid = shasum(pid) if not (pid in templistpid): templistpid.append(pid) p2ps[p].append(pid) p2pobj[p].append(paper) if confs is not None: for conf in confs: idc = conf.get_conf() idy = conf.get_year() idx = idc + idy idx = utils.pruneName(idx, " ") if not (idx in templistconfid): templistconfid.append(idx) p2cs[p].append(idc + idy) self.set_pass_2subs(p2subs) self.set_pass_2papers(p2ps) self.set_pass_2confs(p2cs) self.set_pass_2paper_obj(p2pobj)
def upload_release(version, mobilesdk_zips): release_index = [] for mobilesdk_zip in mobilesdk_zips: upload_mobilesdk(version, mobilesdk_zip) sdk_name = os.path.basename(mobilesdk_zip) matches = re.match(r"mobilesdk-([^-]+)-([^\.]+).zip", sdk_name) sdk_version = matches.group(1) sdk_os = matches.group(2) zip_data = zipfile.ZipFile(mobilesdk_zip) sdk_data = read_properties(zip_data.read("mobilesdk/%s/%s/version.txt" % (sdk_os, sdk_version))) zip_data.close() sha1 = utils.shasum(mobilesdk_zip) filesize = os.path.getsize(mobilesdk_zip) sdk_data["sha1"] = sha1 sdk_data["filesize"] = filesize release_index.append(sdk_data)
def upload_release(version, mobilesdk_zips): release_index = [] for mobilesdk_zip in mobilesdk_zips: upload_mobilesdk(version, mobilesdk_zip) sdk_name = os.path.basename(mobilesdk_zip) matches = re.match(r"mobilesdk-([^-]+)-([^\.]+).zip", sdk_name) sdk_version = matches.group(1) sdk_os = matches.group(2) zip_data = zipfile.ZipFile(mobilesdk_zip) sdk_data = read_properties( zip_data.read("mobilesdk/%s/%s/version.txt" % (sdk_os, sdk_version))) zip_data.close() sha1 = utils.shasum(mobilesdk_zip) filesize = os.path.getsize(mobilesdk_zip) sdk_data["sha1"] = sha1 sdk_data["filesize"] = filesize release_index.append(sdk_data)
import simplejson if len(sys.argv) != 6: print "Usage: %s <desktop|mobile> <path> <branch> <revision> <build url>" % sys.argv[0] sys.exit(1) (type, path, branch, revision, build_url) = sys.argv[1:] cfg = utils.get_build_config() if not cfg.verify_aws(): print "Error: Need both AWS_KEY and AWS_SECRET in the environment or config.json" sys.exit(1) bucket = cfg.open_bucket() sha1 = utils.shasum(path) filename = os.path.basename(path) filesize = os.path.getsize(path) print 'uploading %s (branch %s / revision %s)...' % (filename, branch, revision) key = Key(bucket) key.key = '%s/%s/%s' % (type, branch, filename) key.set_metadata('git_revision', revision) key.set_metadata('git_branch', branch) key.set_metadata('build_url', build_url) key.set_metadata('build_type', type) key.set_metadata('sha1', sha1) max_retries = 5 uploaded = False for i in range(1, max_retries+1):
def serializeResourcesIntoManyFile(self,verbose): name ="SerializeResource.serializeResourcesIntoManyFile" rtype_subclassed =set() newrtype =collections.defaultdict(set) names_serialized =set() avails_serialized =set() status_serialized =set() mods_serialized =set() l1_serialized =set() uses_serialized =set() res_serialized =set() append_res="" ns="&lremap;" #print self.get_rid_2ris() if (verbose==1): print "\t\tExecuting "+name #idx file idxfile=self.outfile+self.__INDEXOFRES__ copy2(self.headerfile, idxfile) iFile=codecs.open(idxfile,'a','utf-8') iFile.write((self.__STARTRES__+"\n").encode('utf-8')) for p, rids in self.get_p_2ris().iteritems(): for rid in rids: rtype_subclassed =set() newrtype =collections.defaultdict(set) names_serialized =set() avails_serialized =set() status_serialized =set() mods_serialized =set() l1_serialized =set() uses_serialized =set() res_serialized =set() ''' prepare the structure for new classes ''' ris=self.get_rid_2ris().get(rid) if ris is not None: for ri in ris: #print rid + " "+ str(ri) ''' create the description file ''' rid1=shasum(rid) first_order_folder = str(rid1[:1]) second_order_folder = str(rid1[0:3]) if not os.path.exists(self.outfile+"/" + self.__RES__+first_order_folder): os.makedirs(self.outfile+"/" + self.__RES__+"/"+first_order_folder) if not os.path.exists(self.outfile+"/" + self.__RES__+first_order_folder + "/" + second_order_folder): os.makedirs(self.outfile+"/" + self.__RES__+first_order_folder + "/" + second_order_folder) append_res=self.__RES__+first_order_folder + "/" + second_order_folder #write description file iLine=self.__t1+"<rdf:Description rdf:about=\"&ri;"+"/"+append_res+"/"+str(rid1)+"\">" iFile.write((iLine+"\n").encode('utf-8')) label="Conference: "+ri.get_conf()+ " and " label=label+"Year: "+ri.get_year()+" and " label=label+"Passcode: "+ri.get_passcode()+" and " label=label+"Type: "+ri.get_type()+" and " label=label+"Name: "+ri.get_name().decode(encoding='UTF-8',errors='strict')+" " iLine=self.__t2+"<rdfs:label>"+label+"</rdfs:label>" #iFile.write((iLine+"\n").encode('utf-8')) iFile.write((iLine+"\n")) iLine=self.__t1+"</rdf:Description>" #iFile.write((iLine+"\n").encode('utf-8')) iFile.write((iLine+"\n")) # as usual rtype=ri.get_type() #print rid + " "+ str(rtype) rtype=pruneName(rtype, "") #rtype="Language_Resources/Technologies_Infrastructure" rtype=modifyString(rtype,"/","-") rtype=rtype.replace("&", "&") if not (rtype in self.__RTYPES__): ns="&ri;#" if not rtype in rtype_subclassed: #print "SUB CLASS "+ rtype rtype_subclassed.add(rtype) newrtype[rtype].add(1) else: nrc=newrtype.get(rtype) if nrc is not None: for num in nrc: #print num del newrtype[rtype] newrtype[rtype].add(num+1) else: if (self.verbose==1): print "\t\t Warning resource type "+rtype+ " NOT YET ENCOUNTERED" else: ns="&lremap;" ''' create the resourcename <owl:NamedIndividual rdf:about="&ri;myname"> <rdf:type rdf:resource="&lremap;ResourceName"/> </owl:NamedIndividual> ''' dst=self.outfile+"/"+append_res+"/"+rid1 copy2(self.headerfile, dst) oFile=codecs.open(dst,'a','utf-8') line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+str(rid1)+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\""+ns+rtype+"\"/>" oFile.write((line + "\n").encode('utf-8')) rname=ri.get_name() #print rid + " "+ str(rtype) rname=pruneName(rname, "") rname=modifyString(rname,"^","-") #rname="-" rname=rname.replace("&", "&") if not (rname in names_serialized): names_serialized.add(rname) #avail="Freely_Available_aaa" avail= ri.get_avail() if avail is not None: avail=pruneName(avail, "") avail=modifyString(avail,"^","-") avail=avail.replace("&", "&") if not (avail in self.__RAVAILS__): avails_serialized.add(avail) line=self.__t2+"<lremap:hasResourceAvailability rdf:resource=\"&ri;"+append_res+"/"+avail+ "\"/>" else: line=self.__t2+"<lremap:hasResourceAvailability rdf:resource=\"&lremap;"+avail+ "\"/>" oFile.write((line + "\n").encode('utf-8')) status= ri.get_prodstatus() if (status is not None) and (status != "NoStatus"): status=pruneName(status, "") status=modifyString(status,"^","-") status=status.replace("&", "&") if not (status in self.__RSTATUS__): status_serialized.add(status) line=self.__t2+"<lremap:hasResourceStatus rdf:resource=\"&ri;"+append_res+"/"+status+ "\"/>" else: line=self.__t2+"<lremap:hasResourceStatus rdf:resource=\"&lremap;"+status+ "\"/>" oFile.write((line + "\n").encode('utf-8')) mod= ri.get_modality() if mod is not None: mod=pruneName(mod, "") mod=modifyString(mod,"^","-") mod=mod.replace("&", "&") if not (mod in self.__RMODS__): mods_serialized.add(mod) line=self.__t2+"<lremap:hasResourceModality rdf:resource=\"&ri;"+append_res+"/"+mod+ "\"/>" else: line=self.__t2+"<lremap:hasResourceModality rdf:resource=\"&lremap;"+mod+ "\"/>" oFile.write((line + "\n").encode('utf-8')) use= ri.get_resourceusage() if use is not None: use=pruneName(use, "") use=modifyString(use,"^","-") use=use.replace("&", "&") if not (use in self.__RUSES__): uses_serialized.add(use) line=self.__t2+"<lremap:hasResourceUse rdf:resource=\"&ri;"+append_res+"/"+use+ "\"/>" else: line=self.__t2+"<lremap:hasResourceUse rdf:resource=\"&lremap;"+use+ "\"/>" oFile.write((line + "\n").encode('utf-8')) ''' create the individual with all properties <owl:NamedIndividual rdf:about="&ri;MyCorpus"> <rdf:type rdf:resource="&lremap;Corpus"/> <lvont:language rdf:resource="&lexvo;id/iso639-3/ita"/> <lremap:hasResourceLanguageType rdf:resource="&lremap;Bi"/> <lremap:hasResourceAvailability rdf:resource="&lremap;Freely_Available"/> <lremap:hasResourceName rdf:resource="&ri;myname"/> </owl:NamedIndividual> ''' #line=self.__t2+"<lremap:hasResourceName rdf:resource=\"&ri;"+append_res+"/"+rname+ "\"/>" line=self.__t2+"<lremap:hasResourceName rdf:resource=\"&lremap;"+rname.decode(encoding='UTF-8',errors='strict')+ "\"/>" #oFile.write((line + "\n").encode('utf-8')) oFile.write(line+"\n") # referencing submissions pids =self.get_p_2subs().get(p) if pids is not None: for pid in pids: line=self.__t2+"<dcterms:references rdf:resource=\"⊂#"+str(pid)+"\"/>" oFile.write((line + "\n").encode('utf-8')) #closing individual line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRES__ + "\n").encode('utf-8')) # create subclass oFile.write((self.__STARTRESCLS__).encode('utf-8')) ''' create <owl:Class rdf:about="&ri;MyRT"> <rdfs:subClassOf rdf:resource="&lremap;ResourceType"/> </owl:Class> ''' for rc in rtype_subclassed: line=self.__t1+"<owl:Class rdf:about=\"&ri;"+append_res+"/"+rc+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdfs:subClassOf rdf:resource=\"&lremap;ResourceType\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:Class>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRESCLS__).encode('utf-8')) #resourcenames inds for rname in names_serialized: oFile.write((self.__STARTRESNAMES__).encode('utf-8')) #line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+rname+ "\">" line=self.__t1+"<owl:NamedIndividual rdf:about=\"&lremap;"+append_res+"/"+rname.decode(encoding='UTF-8',errors='strict')+ "\">" #oFile.write((line + "\n").encode('utf-8')) oFile.write(line+"\n") if rname!="-": line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceName\"/>" else: line=self.__t2+"<rdf:type rdf:resource=\"&lremap;NoName\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRESNAMES__).encode('utf-8')) for avail in avails_serialized: oFile.write((self.__STARTRESAVAILS__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+avail+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceAvailability\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDRESAVAILS__).encode('utf-8')) for status in status_serialized: oFile.write((self.__STARTRESSTATUS__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+status+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceStatus\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n\n").encode('utf-8')) oFile.write((self.__ENDRESSTATUS__).encode('utf-8')) for mod in mods_serialized: oFile.write((self.__STARTRESMODS__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+mod+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceModality\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n\n").encode('utf-8')) oFile.write((self.__ENDRESMODS__).encode('utf-8')) for use in uses_serialized: oFile.write((self.__STARTRESUSES__).encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&ri;"+append_res+"/"+use+ "\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&lremap;ResourceUse\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n\n").encode('utf-8')) oFile.write((self.__ENDRESUSES__).encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) # iFile.write((self.__ENDRESCLS__).encode('utf-8')) iFile.write((self.__CLOSELINE__).encode('utf-8')) #other stuff self.set_newrtype_num_of_instances(newrtype)
def serializeSubmissionsAndPapersAndConfsInManyFiles(self, verbose): name = "SerializeSubmission.serializeSubmissionsAndPapersAndConfsInManyFiles" sub_serialized = set() sub_paper_serialized = set() sub_conf_serialized = set() append_paper = "papers" append_subs = "submissions" if verbose == 1: print "\t\tExecuting " + name # idx file idxfile = self.outfile + self.__INDEXOFSUBS__ copy2(self.headerfile, idxfile) iFile = codecs.open(idxfile, "a", "utf-8") iFile.write((self.__STARTSUB__ + "\n").encode("utf-8")) for p, subs in self.__p2subs.iteritems(): for s in subs: """ create <owl:NamedIndividual rdf:about="⊂#S1"> <rdf:type rdf:resource="&dcmi;/Event"/> """ subid = s.get_conf() + s.get_year() + "-" + s.get_passcode() # pid=str(paper.get_conf())+str(paper.get_year())+str(paper.get_pid()) # pid=shasum(pid) if subid in sub_serialized: pass # print p + " "+pid+ " "+paper.get_conf() else: sub_serialized.add(subid) sid = shasum(subid) first_order_folder = str(sid[:1]) second_order_folder = str(sid[0:3]) if not os.path.exists(self.outfile + "/" + self.__SUBS__ + first_order_folder): os.makedirs(self.outfile + "/" + self.__SUBS__ + "/" + first_order_folder) if not os.path.exists( self.outfile + "/" + self.__SUBS__ + first_order_folder + "/" + second_order_folder ): os.makedirs(self.outfile + "/" + self.__SUBS__ + first_order_folder + "/" + second_order_folder) append_subs = self.__SUBS__ + first_order_folder + "/" + second_order_folder iLine = self.__t1 + '<rdf:Description rdf:about="⊂' + append_subs + "/" + str(subid) + '">' iFile.write((iLine + "\n").encode("utf-8")) iLine = self.__t2 + "<dc:identifier>" + p + "</dc:identifier>" iFile.write((iLine + "\n").encode("utf-8")) iLine = self.__t1 + "</rdf:Description>" iFile.write((iLine + "\n").encode("utf-8")) dst = self.outfile + "/" + append_subs + "/" + subid copy2(self.headerfile, dst) oFile = codecs.open(dst, "a", "utf-8") # oFile.write((self.__STARTAUTH__+"\n").encode('utf-8')) line = self.__t1 + '<owl:NamedIndividual rdf:about="⊂' + append_subs + "/" + str(subid) + '">' oFile.write((line + "\n").encode("utf-8")) # get the paper pids = self.get_pass_2papers().get(p) if pids is not None: for pid in pids: if not (pid in sub_paper_serialized): sub_paper_serialized.add(pid) first_order_folder = str(pid[:1]) second_order_folder = str(pid[0:3]) append_paper = self.__PAPERS__ + first_order_folder + "/" + second_order_folder line = ( self.__t2 + '<dcterms:references rdf:resource="&paper;' + append_paper + "/" + str(pid) + '"/>' ) oFile.write((line + "\n").encode("utf-8")) # add a label for the title papers = self.get_pass_2paper_obj().get(p) if papers is not None: # print papers for paper in papers: title = paper.get_title() title = title.replace("&", "&") line = self.__t2 + "<rdfs:label>" + title + "</rdfs:label>" oFile.write((line + "\n").encode("utf-8")) """ Create <dcterms:references rdf:resource="&swc;#C1"/> """ confs = self.get_pass_2confs().get(p) if confs is not None: for conf in confs: sub_conf_serialized.add(conf) line = self.__t2 + '<dcterms:references rdf:resource="&conf;#' + str(conf) + '"/>' oFile.write((line + "\n").encode("utf-8")) line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode("utf-8")) oFile.write((self.__ENDSUB__ + "\n").encode("utf-8")) oFile.write((self.__CLOSELINE__ + "\n").encode("utf-8")) iFile.write((self.__ENDSUB__ + "\n").encode("utf-8")) iFile.write((self.__CLOSELINE__ + "\n").encode("utf-8"))
def serializeAuthorsAndAffiliationsInManyFiles(self,verbose): name ="SerializeAuthor.serializeAuthorsAndAffiliationsInManyFiles" auth_serialized =set() auth_affi_serialized =set() append_auth="" append_affi="affiliations" if (verbose==1): print "\t\tExecuting "+name #idx file idxfile=self.outfile+self.__INDEXOFAUTHS__ copy2(self.headerfile, idxfile) iFile=codecs.open(idxfile,'a','utf-8') iFile.write((self.__STARTAUTH__+"\n").encode('utf-8')) for p, auths in self.__p2auths.iteritems(): for auth in auths: ''' create <owl:NamedIndividual rdf:about="&auth;x/xyz/2d0249738f36125405e9333b23035856b20db21c"> calculate the shasum1 of the email ''' ida=shasum(auth.get_email()) if ida in auth_serialized: pass #print ida else: auth_serialized.add(ida) #output file according to shasum first_order_folder = str(ida[:1]) second_order_folder = str(ida[0:3]) if not os.path.exists(self.outfile+"/" + self.__AUTHS__+first_order_folder): os.makedirs(self.outfile+"/" + self.__AUTHS__+"/"+first_order_folder) if not os.path.exists(self.outfile+"/" + self.__AUTHS__+first_order_folder + "/" + second_order_folder): os.makedirs(self.outfile+"/" + self.__AUTHS__+first_order_folder + "/" + second_order_folder) append_auth=self.__AUTHS__+first_order_folder + "/" + second_order_folder #write description file iLine=self.__t1+"<rdf:Description rdf:about=\"&auth;"+append_auth+"/"+str(ida)+"\">" iFile.write((iLine+"\n").encode('utf-8')) iLine=self.__t2+"<rdfs:label>"+auth.get_lastname()+ " "+auth.get_firstname()+"</rdfs:label>" iFile.write((iLine+"\n").encode('utf-8')) iLine=self.__t1+"</rdf:Description>" iFile.write((iLine+"\n").encode('utf-8')) dst=self.outfile+"/"+append_auth+"/"+ida copy2(self.headerfile, dst) oFile=codecs.open(dst,'a','utf-8') #oFile.write((self.__STARTAUTH__+"\n").encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&auth;"+append_auth+"/"+str(ida)+"\">" oFile.write((line+"\n").encode('utf-8')) ''' Create <rdf:type rdf:resource="&foaf;/Person"/> ''' line=self.__t2+"<rdf:type rdf:resource=\"&foaf;/Person\"/>" oFile.write((line+"\n").encode('utf-8')) ''' Create <foaf:familyName>lastname</foaf:familyName> ''' line=self.__t2+"<foaf:familyName>"+auth.get_lastname()+"</foaf:familyName>" oFile.write((line+"\n").encode('utf-8')) '''' Create <foaf:mbox rdf:resource="mailto:[email protected]"/> ''' line =self.__t2+"<foaf:mbox rdf:resource=\"mailto:"+auth.get_email()+"\"/>" oFile.write((line+"\n").encode('utf-8')) '''' Create <foaf:mbox_sha1sum>2d0249738f36125405e9333b23035856b20db21c</foaf:mbox_sha1sum> ''' line =self.__t2+"<foaf:mbox_sha1sum>"+ida+"</foaf:mbox_sha1sum>" oFile.write((line+"\n").encode('utf-8')) ''' Create <foaf:firstName>Vasile</foaf:firstName> ''' line =self.__t2+"<foaf:firstName>"+auth.get_firstname()+"</foaf:firstName>" oFile.write((line+"\n").encode('utf-8')) ''' Loop over affiliations ''' ida2idaffi=self.get_sha_2_affiliation() idsaffi=ida2idaffi.get(ida) #print "XXX "+ida + " "+str(idsaffi) if idsaffi is not None: #get affiliations for idaffi in idsaffi: if ida+idaffi in auth_affi_serialized: pass #print "XXX "+ida + " "+idaffi else: auth_affi_serialized.add(ida+idaffi) affiliations = self.get_affiliation_id_2_details() details = affiliations.get(idaffi) first_order_folder = str(idaffi[:1]) second_order_folder = str(idaffi[0:3]) append_affi=self.__AFFILIATIONS__+first_order_folder + "/" + second_order_folder for det in details: # print det.get_name() ''' Create as many has affiliation needed <auth:hasAffiliation rdf:resource="&auth;#IRISA"/> ''' line = self.__t2 + "<swc:affiliation rdf:resource=\"&auth;" +append_affi+"/" +idaffi + "\"/>" oFile.write((line + "\n").encode('utf-8')) ''' Close author </owl:NamedIndividual> ''' line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) #end for author #end for passcode iFile.write((self.__ENDAUTH__+"\n").encode('utf-8')) iFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) ''' Each affiliation in a distinct file ''' #idx file idxfile=self.outfile+"/"+self.__INDEXOFAFFILIATIONS__ copy2(self.headerfile, idxfile) iFile=codecs.open(idxfile,'a','utf-8') iFile.write((self.__STARTAFFI__+"\n").encode('utf-8')) for idaffi, details in self.get_affiliation_id_2_details().iteritems(): for det in details: ''' <owl:NamedIndividual rdf:about="&auth;#id"> <rdf:type rdf:resource="&foaf;/Organization"/> <foaf:organization>Txxxp</foaf:organization> <gn:countryCode>FR</gn:countryCode> </owl:NamedIndividual> ''' ''' The individual ''' first_order_folder = str(idaffi[:1]) second_order_folder = str(idaffi[0:3]) if not os.path.exists(self.outfile+"/" + self.__AFFILIATIONS__+ first_order_folder): os.makedirs(self.outfile+"/" + self.__AFFILIATIONS__+ first_order_folder) if not os.path.exists(self.outfile+"/" + self.__AFFILIATIONS__+ first_order_folder + "/" + second_order_folder): os.makedirs(self.outfile+"/" + self.__AFFILIATIONS__+ first_order_folder + "/" + second_order_folder) append_affi=self.__AFFILIATIONS__+first_order_folder + "/" + second_order_folder+"/" #write description file iLine=self.__t1+"<rdf:Description rdf:about=\"&auth;"+append_affi+"/"+str(idaffi)+"\"></rdf:Description>" iFile.write((iLine+"\n").encode('utf-8')) dst=self.outfile+"/"+append_affi+"/"+idaffi copy2(self.headerfile, dst) oFile=codecs.open(dst,'a','utf-8') line=self.__t1+"<owl:NamedIndividual rdf:about=\"&auth;"+append_affi+idaffi+"\">" oFile.write((line + "\n").encode('utf-8')) ''' The type ''' line=self.__t2+"<rdf:type rdf:resource=\"&foaf;/Organization\"/>" oFile.write((line + "\n").encode('utf-8')) ''' The name ''' name=det.get_name() name=name.replace("&", "&") line=self.__t2+"<foaf:organization>"+name+"</foaf:organization>" oFile.write((line + "\n").encode('utf-8')) ''' The country ''' cc=det.get_country_code() line=self.__t2+"<gn:countryCode>"+cc+"</gn:countryCode>" oFile.write((line + "\n").encode('utf-8')) ''' Close Affiliation </owl:NamedIndividual> ''' line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) #end loop idaffi #end loop details iFile.write((self.__ENDAFFI__+"\n").encode('utf-8')) iFile.write((self.__CLOSELINE__ + "\n").encode('utf-8'))
def serializeAuthorsAndAffiliationsIntoSigleFile(self,verbose): name ="SerializeAuthor.serializeAuthorsAndAffiliationsIntoSigleFile" auth_serialized =set() auth_affi_serialized =set() if (verbose==1): print "\t\tExecuting "+name oFile=codecs.open(self.outfile,'a','utf-8') oFile.write((self.__STARTAUTH__).encode('utf-8')) for p, auths in self.__p2auths.iteritems(): for auth in auths: ida=shasum(auth.get_email()) if ida in auth_serialized: pass #print ida else: auth_serialized.add(ida) ''' create <owl:NamedIndividual rdf:about="&auth;#2d0249738f36125405e9333b23035856b20db21c"> calculate the shasum1 of the email ''' line=self.__t1+"<owl:NamedIndividual rdf:about=\"&auth;#"+str(ida)+"\">" oFile.write((line+"\n").encode('utf-8')) ''' Create <rdf:type rdf:resource="&foaf;/Person"/> ''' line=self.__t2+"<rdf:type rdf:resource=\"&foaf;/Person\"/>" oFile.write((line+"\n").encode('utf-8')) ''' Create <foaf:familyName>lastname</foaf:familyName> ''' line=self.__t2+"<foaf:familyName>"+auth.get_lastname()+"</foaf:familyName>" oFile.write((line+"\n").encode('utf-8')) '''' Create <foaf:mbox rdf:resource="mailto:[email protected]"/> ''' line =self.__t2+"<foaf:mbox rdf:resource=\"mailto:"+auth.get_email()+"\"/>" oFile.write((line+"\n").encode('utf-8')) '''' Create <foaf:mbox_sha1sum>2d0249738f36125405e9333b23035856b20db21c</foaf:mbox_sha1sum> ''' line =self.__t2+"<foaf:mbox_sha1sum>"+ida+"</foaf:mbox_sha1sum>" oFile.write((line+"\n").encode('utf-8')) ''' Create <foaf:firstName>Vasile</foaf:firstName> ''' line =self.__t2+"<foaf:firstName>"+auth.get_firstname()+"</foaf:firstName>" oFile.write((line+"\n").encode('utf-8')) ''' Loop over affiliations ''' ida2idaffi=self.get_sha_2_affiliation() idsaffi=ida2idaffi.get(ida) #print "XXX "+ida + " "+str(idsaffi) if idsaffi is not None: #get affiliations for idaffi in idsaffi: if ida+idaffi in auth_affi_serialized: pass #print "XXX "+ida + " "+idaffi else: auth_affi_serialized.add(ida+idaffi) affiliations = self.get_affiliation_id_2_details() details = affiliations.get(idaffi) for det in details: # print det.get_name() ''' Create as many has affiliation needed <auth:hasAffiliation rdf:resource="&auth;#IRISA"/> ''' line = self.__t2 + "<swc:affiliation rdf:resource=\"&auth;#" + idaffi + "\"/>" oFile.write((line + "\n").encode('utf-8')) ''' Close author </owl:NamedIndividual> ''' line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) #end for author #end for passcode oFile.write((self.__ENDAUTH__ + "\n").encode('utf-8')) oFile.write((self.__STARTAFFI__ + "\n").encode('utf-8')) for idaffi, details in self.get_affiliation_id_2_details().iteritems(): for det in details: ''' <owl:NamedIndividual rdf:about="&auth;#id"> <rdf:type rdf:resource="&foaf;/Organization"/> <foaf:organization>Txxxp</foaf:organization> <gn:countryCode>FR</gn:countryCode> </owl:NamedIndividual> ''' ''' The individual ''' line=self.__t1+"<owl:NamedIndividual rdf:about=\"&auth;#"+idaffi+"\">" oFile.write((line + "\n").encode('utf-8')) ''' The type ''' line=self.__t2+"<rdf:type rdf:resource=\"&foaf;/Organization\"/>" oFile.write((line + "\n").encode('utf-8')) ''' The name ''' name=det.get_name() name=name.replace("&", "&") line=self.__t2+"<foaf:organization>"+name+"</foaf:organization>" oFile.write((line + "\n").encode('utf-8')) ''' The country ''' cc=det.get_country_code() line=self.__t2+"<gn:countryCode>"+cc+"</gn:countryCode>" oFile.write((line + "\n").encode('utf-8')) ''' Close Affiliation </owl:NamedIndividual> ''' line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDAFFI__ + "\n").encode('utf-8')) # #use to debug # for a, bs in self.get_sha_2_affiliation().iteritems(): # for b in bs: # print "Author "+a +" hasAffiliation "+b # # print "" # for p, auths in self.__p2auths.iteritems(): # for auth in auths: # print "Passcode "+p +" hasAuthEmail "+auth.get_email() #end oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8'))
def serializePapersAndAuthorListInManyFiles(self,verbose): name ="SerializePaper.serializePapersAndAuthorListInManyFiles" paper_serialized =set() # auth_serialized =set() paper_auth_serialized =set() # auth_affi_serialized =set() paper_status_serialized =set() #auth_affi_serialized =set() append_auth="authors" append_paper="papers" append_status="status" if (verbose==1): print "\t\tExecuting "+name #idx file idxfile=self.outfile+self.__INDEXOFPAPERS__ copy2(self.headerfile, idxfile) iFile=codecs.open(idxfile,'a','utf-8') iFile.write((self.__STARTPAPER__+"\n").encode('utf-8')) for p, papers in self.__p2papers.iteritems(): for paper in papers: ''' create <owl:NamedIndividual rdf:about="&paper;x/xyz/2d0249738f36125405e9333b23035856b20db21c"> calculate the shasum1 of the email ''' pid=str(paper.get_conf())+str(paper.get_year())+str(paper.get_pid()) pid=shasum(pid) if pid in paper_serialized: pass print pid else: paper_serialized.add(pid) #output file according to shasum first_order_folder = str(pid[:1]) second_order_folder = str(pid[0:3]) if not os.path.exists(self.outfile+"/" + self.__PAPERS__+first_order_folder): os.makedirs(self.outfile+"/" + self.__PAPERS__+"/"+first_order_folder) if not os.path.exists(self.outfile+"/" + self.__PAPERS__+first_order_folder + "/" + second_order_folder): os.makedirs(self.outfile+"/" + self.__PAPERS__+first_order_folder + "/" + second_order_folder) append_paper=self.__PAPERS__+first_order_folder + "/" + second_order_folder #write description file title=paper.get_title() title=title.replace("&", "&") iLine=self.__t1+"<rdf:Description rdf:about=\"&paper;"+append_paper+"/"+str(pid)+"\">" iFile.write((iLine+"\n").encode('utf-8')) iLine=self.__t2+"<dc:title>"+title+"</dc:title>" iFile.write((iLine+"\n").encode('utf-8')) iLine=self.__t1+"</rdf:Description>" iFile.write((iLine+"\n").encode('utf-8')) dst=self.outfile+"/"+append_paper+"/"+pid copy2(self.headerfile, dst) oFile=codecs.open(dst,'a','utf-8') #oFile.write((self.__STARTAUTH__+"\n").encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;"+append_paper+"/"+str(pid)+"\">" oFile.write((line+"\n").encode('utf-8')) ''' Create <rdf:type rdf:resource="&bibo;/Article"/> ''' line=self.__t2+"<rdf:type rdf:resource=\"&bibo;Article\"/>" oFile.write((line+"\n").encode('utf-8')) ''' create title ''' title=paper.get_title() title=title.replace("&", "&") line=self.__t2+"<dc:title>"+title+"</dc:title>" oFile.write((line+"\n").encode('utf-8')) ''' create the status ''' status=pruneName(paper.get_status(),"") if not status in paper_status_serialized: #print "XXXX "+status paper_status_serialized.add(status) ser=shasum(status) first_order_folder = str(ser[:1]) second_order_folder = str(ser[0:3]) append_status=self.__STATUS__+first_order_folder + "/" + second_order_folder line= self.__t2+"<bibo:status rdf:resource=\"&paper;"+append_status+"/"+ser+"\"/>" oFile.write((line+"\n").encode('utf-8')) else: pass #print "ZZZ "+status ''' create the authors''' auths=self.get_paper_2auths() #print len(auths) if len(auths)>0: line=self.__t2+"<bibo:authorList rdf:parseType=\"Collection\">" oFile.write((line+"\n").encode('utf-8')) # line=self.__t3+"<rdf:Seq>" # oFile.write((line+"\n").encode('utf-8')) for a in auths.get(pid): if not (a in paper_auth_serialized): ida=a.split("#")[2] first_order_folder = str(ida[:1]) second_order_folder = str(ida[0:3]) append_auth=self.__AUTHS__+first_order_folder + "/" + second_order_folder+"/" line=self.__t3+"<rdf:Description rdf:about=\"&auth;"+append_auth+ida+"\">" oFile.write((line+"\n").encode('utf-8')) line=self.__t4+"<rdfs:label>"+a.split("#")[1]+ " "+a.split("#")[0]+"</rdfs:label>" oFile.write((line+"\n").encode('utf-8')) line=self.__t3+"</rdf:Description>" oFile.write((line+"\n").encode('utf-8')) else: if (verbose==1): print "\t\tWARNING Author Element "+a+ " appears many times" # line=self.__t3+"</rdf:Seq>" # oFile.write((line+"\n").encode('utf-8')) line=self.__t2+"</bibo:authorList>" oFile.write((line+"\n").encode('utf-8')) ''' Close paper </owl:NamedIndividual> ''' line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) #end for author #end for passcode iFile.write((self.__ENDPAPER__+"\n").encode('utf-8')) iFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) ''' Each status in a distinct file ''' #idx file idxfile=self.outfile+"/"+self.__INDEXOFSTATUS__ copy2(self.headerfile, idxfile) iFile=codecs.open(idxfile,'a','utf-8') iFile.write((self.__STARTSTATUS__+"\n").encode('utf-8')) #write status for s in paper_status_serialized: ''' <owl:NamedIndividual rdf:about="&paper;undecided"> <rdf:type rdf:resource="&bibo2;DocumentStatus"/> </owl:NamedIndividual> ''' ser=shasum(s) first_order_folder = str(ser[:1]) second_order_folder = str(ser[0:3]) if not os.path.exists(self.outfile+"/" + self.__STATUS__+first_order_folder): os.makedirs(self.outfile+"/" + self.__STATUS__+"/"+first_order_folder) if not os.path.exists(self.outfile+"/" + self.__STATUS__+first_order_folder + "/" + second_order_folder): os.makedirs(self.outfile+"/" + self.__STATUS__+first_order_folder + "/" + second_order_folder) append_status=self.__STATUS__+first_order_folder + "/" + second_order_folder dst=self.outfile+"/"+append_status+"/"+ser copy2(self.headerfile, dst) oFile=codecs.open(dst,'a','utf-8') #write description file iLine=self.__t1+"<rdf:Description rdf:about=\"&paper;"+append_status+"/"+str(ser)+"\"></rdf:Description>" iFile.write((iLine+"\n").encode('utf-8')) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;#"+ser+"\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&paper;#status\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdfs:label>"+s+"</rdfs:label>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) iFile.write((self.__ENDSTATUS__ + "\n").encode('utf-8')) iFile.write((self.__CLOSELINE__ + "\n").encode('utf-8'))
def serializePapersAndAuthorListIntoSigleFile(self,verbose): name ="SerializePaper.serializePapersAndAuthorListIntoSigleFile" paper_serialized =set() paper_auth_serialized =set() paper_status_serialized =set() if (verbose==1): print "\t\tExecuting "+name oFile=codecs.open(self.outfile,'a','utf-8') oFile.write((self.__STARTPAPER__).encode('utf-8')) for p, papers in self.__p2papers.iteritems(): for paper in papers: ''' create <owl:NamedIndividual rdf:about="&paper;#2d0249738f36125405e9333b23035856b20db21c"> calculate the shasum1 of the email ''' pid=str(paper.get_conf())+str(paper.get_year())+str(paper.get_pid()) pid=shasum(pid) if pid in paper_serialized: pass #print p + " "+pid+ " "+paper.get_conf() else: paper_serialized.add(pid) #print p + " "+pid+ " "+paper.get_conf() line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;#"+str(pid)+"\">" oFile.write((line+"\n").encode('utf-8')) ''' Create <rdf:type rdf:resource="&bibo;Article" /> ''' line=self.__t2+"<rdf:type rdf:resource=\"&bibo;Article\"/>" oFile.write((line+"\n").encode('utf-8')) ''' create title ''' title=paper.get_title() title=title.replace("&", "&") line=self.__t2+"<dc:title>"+title+"</dc:title>" oFile.write((line+"\n").encode('utf-8')) ''' create the status ''' status=pruneName(paper.get_status(),"") if not status in paper_status_serialized: #print "XXXX "+status paper_status_serialized.add(status) status=shasum(status) line= self.__t2+"<bibo:status rdf:resource=\"&paper;#"+status+"\"/>" oFile.write((line+"\n").encode('utf-8')) else: pass #print "ZZZ "+status ''' create the authors''' auths=self.get_paper_2auths() #print auths if len(auths)>0: line=self.__t2+"<bibo:authorList rdf:parseType=\"Collection\">" oFile.write((line+"\n").encode('utf-8')) line=self.__t3+"<rdf:Seq>" #oFile.write((line+"\n").encode('utf-8')) for a in auths.get(pid): #print a if not (a in paper_auth_serialized): #<rdf:Description rdf:about line=self.__t3+"<rdf:Description rdf:about=\"&auth;#"+a.split("#")[2]+"\">" oFile.write((line+"\n").encode('utf-8')) line=self.__t4+"<rdfs:label>"+a.split("#")[1]+ " "+a.split("#")[0]+"</rdfs:label>" oFile.write((line+"\n").encode('utf-8')) line=self.__t3+"</rdf:Description>" oFile.write((line+"\n").encode('utf-8')) else: if (verbose==1): print "\t\tWARNING Author Element "+a+ " appears many times" #line=self.__t3+"</rdf:Seq>" #oFile.write((line+"\n").encode('utf-8')) line=self.__t2+"</bibo:authorList>" oFile.write((line+"\n").encode('utf-8')) ''' Close paper </owl:NamedIndividual> ''' line = self.__t1 + "</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) #end for papers #end for passcode oFile.write((self.__ENDPAPER__ + "\n").encode('utf-8')) oFile.write((self.__STARTSTATUS__ + "\n").encode('utf-8')) #write status for s in paper_status_serialized: ''' <owl:NamedIndividual rdf:about="&paper;undecided"> <rdf:type rdf:resource="&bibo2;DocumentStatus"/> </owl:NamedIndividual> ''' ser=shasum(s) line=self.__t1+"<owl:NamedIndividual rdf:about=\"&paper;#"+ser+"\">" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdf:type rdf:resource=\"&paper;#status\"/>" oFile.write((line + "\n").encode('utf-8')) line=self.__t2+"<rdfs:label>"+s+"</rdfs:label>" oFile.write((line + "\n").encode('utf-8')) line=self.__t1+"</owl:NamedIndividual>" oFile.write((line + "\n").encode('utf-8')) oFile.write((self.__ENDSTATUS__ + "\n").encode('utf-8')) oFile.write((self.__CLOSELINE__ + "\n").encode('utf-8')) '''
if len(sys.argv) < 6: print "Usage: %s <desktop|mobile> <path> <branch> <revision> <build url> <private>" % sys.argv[ 0] sys.exit(1) (type, path, branch, revision, build_url) = sys.argv[1:] cfg = utils.get_build_config() if not cfg.verify_aws(): print "Error: Need both AWS_KEY and AWS_SECRET in the environment or config.json" sys.exit(1) bucket = cfg.open_bucket() sha1 = utils.shasum(path) filename = os.path.basename(path) filesize = os.path.getsize(path) print 'uploading %s (branch %s / revision %s)...' % (filename, branch, revision) key = Key(bucket) key.key = '%s/%s/%s' % (type, branch, filename) key.set_metadata('git_revision', revision) key.set_metadata('git_branch', branch) key.set_metadata('build_url', build_url) key.set_metadata('build_type', type) key.set_metadata('sha1', sha1) max_retries = 5 uploaded = False