def _process(self): media_list = List('MediaList') url_list = self.tracker.find_elements('Url') media_name = self.tracker.media or 'GenericMedia' media_tag = self.tracker.tag or 'Media' for url in url_list: content = url.urlopen(referer=self.tracker.url).read() filter_list = url.find_elements('Filter') if len(filter_list) == 0: continue self.log.finer(content+'\n') self.log.finest(filter_list[0].content+'\n') cre = re.compile(filter_list[0].content,filter_list[0].flags) m = cre.search(content) while m: self.attributes = {} self._update_attributes(m,url.url) referer = url.url for filter in filter_list[1:]: fcre = re.compile(filter.content,filter.flags) link = self.attributes.get('link','') if not link: break temp_content = urlopen(link,referer=url.url).read() self.log.finer(temp_content+'\n') self.log.finest(filter.content+'\n') n = fcre.search(temp_content) if not n: break self._update_attributes(n,link) if 'download' in n.groupdict().keys(): referer = link keys = self.attributes.keys() self.log.fine('filter: %s\n' % str(self.attributes)) if 'title' in keys and \ 'download' in keys: self.log.debug('filter: %s %s %s\n' % \ (misc.string(self.attributes.get('title','')), misc.string(self.attributes.get('publisher','')), misc.string(self.attributes.get('download','')))) self.attributes['type'] = self.attributes.get('category', self.attributes.get('type','')) self.attributes['link'] = self.attributes.get('download','') media = factory.create(media_name,media_tag,attrs=self.attributes) if self.filter(media): media.fetch(referer=referer) media_list.append(media) m = cre.search(content, m.start(0)+len(m.group(0))) return media_list
def set_attributes(self,attrs): keys = self.attributes.keys() for key in keys: if not attrs.has_key(key): value = '' else: value = string(attrs[key]) atype = self.attributes[key] if atype == STRING: pass elif atype == INTEGER: try: value = int(value) except ValueError: value = 0 elif atype == LONG: try: value = long(value) except ValueError: value = 0L elif atype == FLOAT: try: value = float(value) except ValueError: value = 0.0 elif atype == LIST: value = value.split(',') self.__dict__[key] = value
def is_interest(self,media): if not hasattr(media,self.attribute): return 0 value = string(getattr(media,self.attribute)) found = self.cre.search(value) if self.mode == 'include' and found: return 1 elif self.mode == 'exclude' and found: return -1 return 0
def get_attributes(self): keys = self.attributes.keys() dict = {} for key in keys: atype = self.attributes[key] value = self.__dict__[key] if atype == LIST: value = ','.join(value) else: value = string(value) dict[key] = value return dict
def create_string(self, parent, cur): """ Create string (Expression) | Structure: | String Args: parent (Node): Reference to parent node cur (int): position where string is identified Returns: int: position where string ends See also: :py:func:`matlab2cpp.tree.misc.string` """ return misc.string(self, parent, cur)
def __repr__(self): return '<%s %s>' % (self.tag,string(self.get_attributes()))