def _deliv_in_link(self,text,links,entry = False): ##print text ##print links #print "*"*40 _title = False _description = "" pattern = re.compile("(DELIVERABLES?)|(D[0-9][0-9]*(.[0-9][0-9]*)?)",re.I) _link = False for l in links: if pattern.search(l): if _link == False: _link =l else: return ['-3','Probably more records in one entry'] #loop through text in entry looking for title and description for t in text: if _title == False: if len(t)>10 : _title = t #set the longest string as description of deliverable if len(_description)<len(t): _description = t if _title == _description: _description = "" #if chosen title is not valid try to find better in parent entry if _title and not self._check_title(_title) and entry != False: _title = self._repair_title(entry) #create object if _link: pub = RRSPublication(title=_title,abstract=_description) typ = RRSPublication_type(type='techreport') pub['type'] = typ self.__debug("*"*40) self.__debug("Title: "+_title) self.__debug("Description: "+_description) self.__debug("Link: "+_link) l = RRSUrl(link=_link) pl_rel = RRSRelationshipPublicationUrl() pl_rel.set_entity(l) pub['url'] = pl_rel return pub else: #this entry is not probably deliverable return False
def _more_entry_in_record(self, entry): for ch in entry.iter('chunk'): if ch.text != None and ch.attrib.get("link") != None: if self.agent.is_wanted_mime(ch.attrib.get("link")): _pub = RRSPublication(title=ch.text) typ = RRSPublication_type(type='techreport') _pub['type'] = typ _l = RRSUrl(link=ch.attrib.get("link")) _rel = RRSRelationshipPublicationUrl() _rel.set_entity(_l) _pub['url'] = _rel self._entriesFoundInLinks.append(_pub)
def _more_entry_in_record(self,entry): for ch in entry.iter('chunk'): if ch.text != None and ch.attrib.get("link")!=None: if self.agent.is_wanted_mime(ch.attrib.get("link")): _pub= RRSPublication(title=ch.text) typ = RRSPublication_type(type='techreport') _pub['type'] = typ _l = RRSUrl(link=ch.attrib.get("link")) _rel = RRSRelationshipPublicationUrl() _rel.set_entity(_l) _pub['url'] = _rel self._entriesFoundInLinks.append(_pub)
def _more_entry_in_record(self, entry): """Function try to create array of deliverables from one entry in xml tree""" for ch in entry.iter("chunk"): if ch.text != None and ch.attrib.get("link") != None: if self.agent.is_wanted_mime(ch.attrib.get("link")): _pub = RRSPublication(title=ch.text) typ = RRSPublication_type(type="techreport") _pub["type"] = typ _l = RRSUrl(link=ch.attrib.get("link")) _rel = RRSRelationshipPublicationUrl() _rel.set_entity(_l) _pub["url"] = _rel self._entriesFoundInLinks.append(_pub)
def _deliv_in_text(self, text, links): #print text #print links #print "*"*40 _title = False _description = "" pattern = re.compile("(DELIVERABLES?)|(D[0-9][0-9]*(.[0-9][0-9]*)?)", re.I) #loop through text in entry looking for title and description for t in text: if _title == False: if pattern.search(t): _title = t #set the longest string as description of deliverable if len(_description) < len(t): _description = t if _title == _description: _description = "" _link = False if type(links) == str: if self.agent.is_wanted_mime(links): _link = links elif type(links) == list: for l in links: if self.agent.is_wanted_mime(l): if _link == False: _link = l else: #if there was already found link if _link[:s.rfind(_link, '.')] == l[:s.rfind(l, '.')]: break else: return ['-3', 'Probably more records in one entry'] #create object if _title: #print "TITLE:"+_title pub = RRSPublication(title=_title, abstract=_description) _typ = RRSPublication_type(type='techreport') pub['type'] = _typ self.__debug("*" * 40) self.__debug("Title: " + _title) self.__debug("Description: " + _description) if _link: #print "LINK:"+_link self.__debug("Link: " + _link) l = RRSUrl(link=_link) pl_rel = RRSRelationshipPublicationUrl() pl_rel.set_entity(l) pub['url'] = pl_rel return pub else: #this entry is not probably deliverable return False
def _deliv_in_text(self,text,links): #print text #print links #print "*"*40 _title = False _description = "" pattern = re.compile("(DELIVERABLES?)|(D[0-9][0-9]*(.[0-9][0-9]*)?)",re.I) #loop through text in entry looking for title and description for t in text: if _title == False: if pattern.search(t): _title = t #set the longest string as description of deliverable if len(_description)<len(t): _description = t if _title == _description: _description = "" _link = False if type(links) == str: if self.agent.is_wanted_mime(links): _link = links elif type(links) ==list: for l in links: if self.agent.is_wanted_mime(l): if _link == False: _link = l else: #if there was already found link if _link[:s.rfind(_link,'.')] == l[:s.rfind(l,'.')]: break else: return ['-3','Probably more records in one entry'] #create object if _title: #print "TITLE:"+_title pub = RRSPublication(title=_title,abstract=_description) _typ = RRSPublication_type(type='techreport') pub['type'] = _typ self.__debug("*"*40) self.__debug("Title: "+_title) self.__debug("Description: "+_description) if _link: #print "LINK:"+_link self.__debug("Link: "+_link) l = RRSUrl(link=_link) pl_rel = RRSRelationshipPublicationUrl() pl_rel.set_entity(l) pub['url'] = pl_rel return pub else: #this entry is not probably deliverable return False