Esempio n. 1
0
    def _bind_type(self, obj):
        """
        If object has a type (publication_type, organization_type, event_type etc.)
        this method assigns type from the database enum to this object. If the type
        doesn't exist, method returns False. If everything went OK, returns True.
        If object doesn't have a type, raises DatabaseError.

        The object itself has to be inserted into database before binding type!
        (it fact, it has to contain attribute ID). If not, method raises Exception.
        """
        if obj['id'] is None:
            raise DatabaseError("Object has to contain attribute ID. Insert it into db first.")
        if 'type' in obj:
            type_name = obj['type']['type']
            q = FluentSQLQuery()
            q.select("id").from_table(obj['type']._table_name).where("type=", type_name)
            q()
            res = q.fetch_one()
            if res is None:
                return False
            type_id = res['id']
            q.cleanup()
            q.update(obj._table_name, {"type_id": type_id}).where("id=", obj['id'])
            q()
            return q.count() == 1
        else:
            raise RRSDatabaseEntityError("Entity doesn't have a type to bind to.")
    def _bind_type(self, obj):
        """
        If object has a type (publication_type, organization_type, event_type etc.)
        this method assigns type from the database enum to this object. If the type
        doesn't exist, method returns False. If everything went OK, returns True.
        If object doesn't have a type, raises DatabaseError.

        The object itself has to be inserted into database before binding type!
        (it fact, it has to contain attribute ID). If not, method raises Exception.
        """
        if obj["id"] is None:
            raise DatabaseError("Object has to contain attribute ID. Insert it into db first.")
        if "type" in obj:
            type_name = obj["type"]["type"]
            q = FluentSQLQuery()
            q.select("id").from_table(obj["type"]._table_name).where("type=", type_name)
            q()
            res = q.fetch_one()
            if res is None:
                return False
            type_id = res["id"]
            q.cleanup()
            q.update(obj._table_name, {"type_id": type_id}).where("id=", obj["id"])
            q()
            return q.count() == 1
        else:
            raise RRSDatabaseEntityError("Entity doesn't have a type to bind to.")
Esempio n. 3
0
def checkIfImport(key):
	"""
	Check if publication is in database.
	
	@type name: string
	@param name: Key of publication.
	"""
	q = FluentSQLQuery()
	q.select("id").from_table("publication")
	q.where("researchr_key=", key)
	q()
	data = q.fetch_one()
	return data
Esempio n. 4
0
 def _bind_location_to_woeid(self, location):
     """
     Binds location to woeid number.
     @returns True if woeid found, False otherwise
     """
     if type(location) is not RRSLocation:
         raise TypeError("location has to be type RRSLocation")
     if not 'woeid' in location:
         q = FluentSQLQuery()
         for attr in ("address", "city", "country", "name"):
             if location[attr] is None: continue
             q.cleanup()
             q.select("woeid").from_table("geoplanet").where("name=", location[attr])
             q()
             res = q.fetch_one()
             if res is None: continue
             location['woeid'] = res[0]
             return True
         return False
     return True
 def _bind_location_to_woeid(self, location):
     """
     Binds location to woeid number.
     @returns True if woeid found, False otherwise
     """
     if type(location) is not RRSLocation:
         raise TypeError("location has to be type RRSLocation")
     if not "woeid" in location:
         q = FluentSQLQuery()
         for attr in ("address", "city", "country", "name"):
             if location[attr] is None:
                 continue
             q.cleanup()
             q.select("woeid").from_table("geoplanet").where("name=", location[attr])
             q()
             res = q.fetch_one()
             if res is None:
                 continue
             location["woeid"] = res[0]
             return True
         return False
     return True
Esempio n. 6
0
class ResearchrPublicationFeeder:
	def __init__(self, config, importer_kwargs):
		#data ziskana z api
		self.rPublication = None
		
		#objekt typu RRSPublication, ktery po naplneni budeme importovat do db
		self.publication = None

		#nastaveni pro importer
		self.importer_kwargs = importer_kwargs

		#sleeper range
		self.LimitMin = 0.1
		self.LimitMax = 0.2

		#objekt pro vytvareni sql dotazu
		self.q = FluentSQLQuery()

		#researchr API
		self.researchrClass = ResearchrClass()

		#nejvyssi vrstva, pro nacteni objektu podle id
		self.rrsdb = RRSDatabase()

		#normalizator
		self.norm = Normalize()
		
		#importer
		self.importer = RRSXMLImporter(self.importer_kwargs)

	def __FillType(self):
		"""
		Transform rPublication.type to publication.type
		"""
		_id = self.__GetId("publication_type", "type=", self.rPublication.publication_type)
		if (_id != None):
			self.publication["type"] = self.rrsdb.load("publication_type", _id)
	

	def __FillSeries(self):
		"""
		Add rPublication.series to publication_series table
		"""
		if (self.rPublication.series != None and self.rPublication.series != ""):
			_id = None
			while (_id == None):
				_id = self.__GetId("publication_series", "title=", self.rPublication.series)
				if (_id == None):
					series = RRSPublication_series(title=self.rPublication.series)
					#importer = RRSXMLImporter(self.importer_kwargs)
					self.importer.import_model(series)
					continue
			self.publication["series"] = self.rrsdb.load("publication_series", _id)
			

	def __GetId(self, _from, where, _is):
		"""
		Try to find ID in table and return it
		
		@type  _from: string
		@param _from: Name of table.
		@type  where: string
		@param where: Name of column.
		@type  _is: string
		@param _is: What it is equal.
		@rtype:   int
		@return:  Id of selected entry.
		"""
		self.q.select("id").from_table(_from)
		self.q.where(where, _is)
		self.q()
		data = self.q.fetch_one()
		#print(self.q.sql())
		self.q.cleanup()
		if data != None:
			return data[0]
		return None
	
	def __FillPublisher(self):
		"""
		Add rPublication.publisher to organization table
		"""
	 	if (self.rPublication.publisher != None and self.rPublication.publisher != ""):
			_id = None
			normalized_title = self.norm.organization(self.rPublication.publisher)
			while (_id == None):
				_id = self.__GetId("organization", "title_normalized=", normalized_title)
				if (_id == None):
					organization = RRSOrganization(title=self.rPublication.publisher, 
						title_normalized=normalized_title)
					#importer = RRSXMLImporter(self.importer_kwargs)
					self.importer.import_model(organization)
					continue
				self.publication["publisher"] = self.rrsdb.load("organization", _id)

	def __FillAuthors(self, authorData, isEditor):
		"""
       		FillAuthor Add (if there are not) person to db and
       		contain them with actual publication. Foreach
		rPublication.authors, take only person's url and fullname.
		
		@type  authorData: list
		@param authorData: List of authors data (person, alias)
		@type  isEditor: bool
		@param isEditor: True if authors are editors of this publication.
		"""
		if (len(authorData) != 0):
			rank = 0
			for author in authorData:
				if 'author' in author:
					rFullname = author["person"]["fullname"]
					rUrl = author["person"]["url"]
				else:
					rFullname = author["alias"]["name"]
					rUrl = author["alias"]["url"]
				personUrl = RRSRelationshipPersonUrl()
				rank += 1
				self.__FillUrl(personUrl, rUrl)
				self.__FillPerson(personUrl, rFullname, rank, isEditor)

	def __FillUrl(self, personUrl, rUrl):
		"""
		This function add url to db bind url to person 

		@type  personUrl: RRSRelationshipPersonUrl
		@param personUrl: Relationship object to add url into it.
		@type  rUrl: string
		@param isEditor: rPublication.(person/alias) url, url of author/editor.
		"""
		_id = None
		while (_id == None):
			_id = self.__GetId("url", "link=", rUrl)
			if (_id == None):	
				url = RRSUrl(link=rUrl)
				url["type"] = self.rrsdb.load("url_type", "1")
				#importer = RRSXMLImporter(self.importer_kwargs)
				self.importer.import_model(url)	
				continue
			url = self.rrsdb.load("url", _id)
			personUrl.set_entity(url)
			#print( personUrl)

	def __FillPerson(self, personUrl, rFullname, rank, isEditor):
		"""
		This function try fill first name, middle name, last name of person.

		@type  personUrl: RRSRelationshipPersonUrl
		@param personUrl: Relationship object to bind to person["url"].
		@type  rFullname: string
		@param rFullname: Fullname of author.
		@type  rank: int
		@param rank: Rank of author, first author get 1, second 2 and so on.
		@type  isEditor: bool
		@param isEditor: True if person is editor of this publication.
		"""
		_id = None
		while (_id == None):
			_id = self.__GetId("person", "full_name=", rFullname)
			if (_id == None):
				person = RRSPerson()
				person["full_name"] = rFullname
				person["url"] = personUrl
				self.__SetPersonNames(person, rFullname)
				person["full_name_ascii"] = unicodedata.normalize('NFKD', rFullname).encode('ascii', 'ignore')
				#importer = RRSXMLImporter(self.importer_kwargs)
				#print(person)
				self.importer.import_model(person)
				continue
			publicationPerson = RRSRelationshipPersonPublication(author_rank=rank, editor=isEditor)
			publicationPerson.set_entity(self.rrsdb.load("person", _id))
			#print(publicationPerson)
			self.publication['person'] = publicationPerson

	def __SetPersonNames(self, person, rFullname):
		"""
		This function try fill first name, middle name, last name of person.

		@type  person: RRSPerson
		@param person: Object of author of publication.
		@type  rFullname: string
		@param rFullname: Fullname of author.
		"""
		splitName = rFullname.split()
		if (len(splitName) == 3):
			person["first_name"] = splitName[0]
			person["middle_name"] = splitName[1]
			person["last_name"] = splitName[2]
		elif (len(splitName) == 2):
			person["first_name"] = splitName[0]
			person["last_name"] = splitName[1]

	def FillPublication(self, key):
		"""
		This function call all private function with prefix Fill, 
		this function load data to rPublication structure and then 
		assign data from rPublication to publication(RRSPublication).
		
		@type  key: string
		@param key: Key of the publication.
		"""
		self.__FillRPublication(key)
		self.publication = RRSPublication()
		self.__FillAuthors(self.rPublication.authors, False)
		self.__FillAuthors(self.rPublication.editors, True)
		self.__FillPublisher()
		self.__FillType()
		self.__FillSeries()
		self.publication["title"] = self.rPublication.title
		self.publication["title_normalized"] = self.norm.publication(self.rPublication.title)

		if (self.rPublication.year != None and self.rPublication.year != ""):
			self.publication["year"] = int(self.rPublication.year) # "2000" -> 2000

		if (self.rPublication.month != None and self.rPublication.month != ""):
			self.publication["month"] = int(strptime(self.rPublication.month[:3],'%b').tm_mon)

		if (self.rPublication.volume != None and self.rPublication.volume != "" and self.rPublication.volume.isdigit()):
			self.publication["volume"] = int(self.rPublication.volume)

		if (self.rPublication.number != None and self.rPublication.number != "" and self.rPublication.volume.isdigit()):
			self.publication["number"] = int(self.rPublication.number)

		if (self.rPublication.abstract != None and self.rPublication.abstract != ""):
			self.publication["abstract"] = self.rPublication.abstract

		if (self.rPublication.doi != None and "http://dx.doi.org/" in self.rPublication.doi):
			self.publication["doi"] = self.rPublication.doi.strip('http://dx.doi.org/')

		if (self.rPublication.firstpage != None and self.rPublication.lastpage != None and 
			self.rPublication.firstpage != "" and self.rPublication.lastpage != ""):
			self.publication["pages"] = str(self.rPublication.firstpage) + " - " + str(self.rPublication.lastpage)

		self.publication["language"] = self.rrsdb.load('language', 1)
		self.publication.set("researchr_key", self.rPublication.key, strict=False)
		#print(self.publication)
		#importer = RRSXMLImporter(self.importer_kwargs)
		try:
			self.importer.import_model(self.publication)
		except RRSDatabaseEntityError as e:
			print('RRSDatabaseEntityError - %s, %s' % (self.rPublication.key, str(e)))
			logging.warning('RRSDatabaseEntityError - %s, %s' % (self.rPublication.key, str(e)))
		except DatabaseError as e:
			print('DatabaseError - %s, %s' % (self.rPublication.key, str(e)))
			logging.warning('DatabaseError - %s, %s' % (self.rPublication.key, str(e)))
		except TypeError as e:
			print('TypeError - %s, %s' % (self.rPublication.key, str(e)))
			logging.warning('TypeError - %s, %s' % (self.rPublication.key, str(e)))
		except:
			print('Unexpected error - %s, %s' % (self.rPublication.key, sys.exc_info()[0]))
			logging.warning('Unexpected error - %s, %s' % (self.rPublication.key, sys.exc_info()[0]))

	def __FillRPublication(self, key):
		"""
		Fill rPublication object.

		@type  key: string
		@param key: Name od publication.	
		"""
		self.rPublication = RPublication()
		publicationData = self.researchrClass.getPublication(key)
		time.sleep(random.uniform(self.LimitMin, self.LimitMax))
		#print(publicationData)
		for key, value in publicationData.items():
			if key == 'abstract':
				self.rPublication.abstract = value
			elif key == 'address':
				self.rPublication.address = value
			elif key == 'authors':
				self.rPublication.authors = value
			elif key == 'booktitle':
	     			self.rPublication.booktitle = value
			elif key == 'conference':
	    			self.rPublication.conference = value
			elif key == 'conferenceYear':
	     	       		self.rPublication.conferenceYear = value
			elif key == 'doi':
	     	       		self.rPublication.doi = value
			elif key == 'editors':
				self.rPublication.editors = value
			elif key == 'firstpage':
	     	       		self.rPublication.firstpage = value
			elif key == 'key':
				self.rPublication.key = value
			elif key == 'issuenumber':
				self.rPublication.issuenumber = value
			elif key == 'journal':
				self.rPublication.journal = value
			elif key == 'key':
				self.rPublication.key = value
			elif key == 'lastpage':
	     	       		self.rPublication.lastpage = value
			elif key == 'month':
	     	       		self.rPublication.month = value
			elif key == 'note':
				self.rPublication.note = value
	     		elif key == 'number':
	     	       		self.rPublication.number = value
	     		elif key == 'organization':
	  	   		self.rPublication.organization = value
	  	   	elif key == 'publisher':
	     			self.rPublication.publisher = value
	     		elif key == 'series':
	     			self.rPublication.series = value
	  	   	elif key == 'title':
	  	   		self.rPublication.title = value
	  	   	elif key == 'type':
	 			self.rPublication.publication_type = value
	     		elif key == 'url':
	     			self.rPublication.url = value
	     		elif key == 'volume':
	   			self.rPublication.volume = value
	    		elif key == 'volumenumber':
				self.rPublication.volumenumber = value
	     		elif key == 'year':
		    		self.rPublication.year = value