def tag(self, tags): """Given a single tag or a list of tags adds them to the entry""" if isinstance(tags, str): tags = [tags] for tag in tags: tag_ = Tag() tag_.keyword = tag tag_.lower = tag.lower() tag_.entry_id = self.id Session.add(tag_) Session.commit()
def fetch(feed): """Fetches entries from a feed and stores them in the database""" d = feedparser.parse(feed.url) got_entries = len(d['entries']) fetch_ = Fetch() fetch_.feed_id = feed.id fetch_.result = str(got_entries) Session.add(fetch_) feed.last_fetched_at = datetime.datetime.now() Session.add(feed) count = 0 for e in d['entries']: url = e.get('link') exists = Session.query(Entry).filter_by(url=url).first() if exists: continue title = e.get('title') # Try to get a published time, differs widely by feed.. published = e.get('published_parsed') if not published: published = e.get('updated_parsed') if not published: published = e.get('created_parsed') if not published: # If all aobe failed we will just use current gmtime published = time.gmtime() # Now convert published to a datetime published = datetime.datetime(*published[:6]) summary = e.get('summary') # Now save the entry into the db... entry = Entry() entry.feed_id = feed.id entry.title = title entry.feed_title = feed.title entry.url = url entry.pubtime = published entry.summary = summary entry.host = get_host(feed.weburl) Session.add(entry) Session.commit() count += 1 Session.commit() return count
parser.add_option('--ini', help='INI file to use for application settings', type='str', default='development.ini') help_ = 'Force a download of the feed even if its within throttle time.' parser.add_option('--force', help=help_, action='store_true', default=False) (options, args) = parser.parse_args() conf = appconfig('config:' + options.ini, relative_to='.') load_environment(conf.global_conf, conf.local_conf) engine = create_engine(conf['sqlalchemy.url'], echo=True) meta = MetaData() conn = engine.connect() entries = Session.query(Entry).filter_by(tags=None) for e in entries: text = e.title tags = tag(text) print tags e.tags = ' '.join(tags) e.tag(tags) Session.add(e) Session.commit()