def toggle_display() -> flask.Response: user = flask_security.current_user tagged = flask.request.form.getlist("displayed", type=int) untagged = flask.request.form.getlist("hidden", type=int) with model.get_session() as session: tagged = [ session.get( orm_feeds.Feed, feed_id, ) for feed_id in tagged ] untagged = [ session.get( orm_feeds.Feed, feed_id, ) for feed_id in untagged ] if tagged: feed_db.upsert_display(user, *tagged, displayed=False) if untagged: feed_db.upsert_display(user, *untagged, displayed=True) return "", 200
def feed(feed_id: int = None, ) -> flask.Response: if not feed_id: feed_id = flask.request.args.get("feed", type=int) user = flask_security.current_user with model.get_session() as session: feed = session.get( orm_feeds.Feed, feed_id, options=[ sqla.orm.selectinload( orm_feeds.Feed.users.and_( orm_users.User.UserID == user.UserID, )), ], ) return flask.render_template( "feed.html", **context.base_context(), topnav_title=feed.Title, feed_row=feed, langs_all=schema_feeds.Language, lang_default=schema_feeds.Language.ENGLISH, feed_tags=db.all_tags(user, feed), feed_tags_not=db.all_tags(user, feed, False), )
def toggle_tags() -> flask.Response: feed_id = flask.request.form.get("feed_id", type=int) tagged = flask.request.form.getlist("tagged", type=int) untagged = flask.request.form.getlist("untagged", type=int) with model.get_session() as session: feed = session.get( orm_feeds.Feed, feed_id, ) tagged = [session.get( orm_feeds.Tag, tag_id, ) for tag_id in tagged] untagged = [ session.get( orm_feeds.Tag, tag_id, ) for tag_id in untagged ] db.detach_tags(feed, *tagged) db.attach_tags(feed, *untagged) return "", 200
def compute_score( user_id: int, lang: schema_feeds.Language, items: typing.List[orm_items.Item], ) -> typing.List[float]: """ Computes item score using classifier. Args: user_id: User ID. lang: Language. items: List of item objects. Returns: List of scores between -1.0 and +1.0. """ with model.get_session() as session: user = session.get(orm_users.User, user_id) clf = io.read_classifier(user, lang) titles = [build_feature(row_it) for row_it in items] targets = clf.predict_proba(titles) scores = 2. * targets[:, 1] - 1. return scores
def test_feeds(): session = get_session() Feed = get_model('Feeds') Item = get_model('Items') read_feeds("The Atlantic") assert(session.query(Item).join(Feed).filter(Feed.Title.like("The Atlantic%")).count())
def test_xml_read(): session = get_session() User = get_model('Users') Feed = get_model('Feeds') Tag = get_model('Tags') tags2feeds = get_table('Tags2Feeds') # Building a many-to-many relationship. Feed.tags = orm.relationship( Tag, secondary = tags2feeds, back_populates = 'feeds' ) Tag.feeds = orm.relationship( Feed, secondary = tags2feeds, back_populates = 'tags' ) user_id = session.query(User.UserID).filter(User.Name == "hansolo").scalar() for file_it in glob.glob(os.path.join("feeds.d", "*.xml")): tag_name = file_it[len("feeds.d") + 1:-len(".xml")] with open(file_it, 'r') as f: read_xml(f, user_id, tag_name) q = (session.query(Tag.feeds) .filter( Tag.UserID == user_id, Tag.Name == tag_name ).count()) assert(q)
def parse(input_dir=None, db_path=None, db_url=None): assert input_dir is not None and (db_path is not None or db_url is not None) init_set = set() for root, dirs, files in os.walk(input_dir): for file in files: if file[-4:] == '.txt': exist_set = parse_single(os.path.join(root, file), db_path, db_url, init_set) init_set = init_set.union(exist_set) # 最后处理内部引证关系 print('开始处理内部引证关系……') engine = get_engine(db_path, db_url) Base.metadata.create_all(engine) session = get_session(engine) session.execute( 'INSERT INTO wos_inner_reference ' 'SELECT DISTINCT t1.document_unique_id AS citing_paper_id, t2.unique_id AS cited_paper_id ' 'FROM wos_reference t1 INNER JOIN wos_document t2 ' 'ON t1.document_md5 = t2.document_md5 OR t1.doi = t2.doi ' 'ORDER BY citing_paper_id, cited_paper_id') session.commit() session.execute( 'DELETE FROM wos_inner_reference WHERE citing_paper_id = cited_paper_id' ) session.commit() session.close() print('全部解析完成')
def upsert_display( user: orm_users.User, *feeds: orm_feeds.Feed, displayed: bool = True, ): with model.get_session() as session: user = session.get( orm_users.User, user.UserID, ) for feed_it in feeds: feed_it = session.get( orm_feeds.Feed, feed_it.FeedID, ) if displayed: feed_it.users.append(user) else: try: feed_it.users.remove(user) except ValueError: pass session.commit()
def all_tags( user: orm_users.User = None, feed: orm_feeds.Feed = None, flag: bool = True, display: bool = True, ) -> typing.List[orm_feeds.Tag]: q = sqla.select(orm_feeds.Tag, ).order_by( sqla.collate(orm_feeds.Tag.Name, "NOCASE"), ) if user: q = q.where(orm_feeds.Tag.UserID == user.UserID) if feed: q_where = orm_feeds.Tag.feeds.any( orm_feeds.Feed.FeedID == feed.FeedID, ) if not flag: q_where = ~q_where q = q.where(q_where) if display: q = q.join(orm_feeds.Tag.feeds).join( orm_feeds.Feed.users.and_( orm_users.User.UserID == user.UserID, )).distinct() with model.get_session() as session: return [e[0] for e in session.execute(q)]
def test_display(): feeds = get_table('Feeds') display = get_table('Display') session = get_session() User = get_model('Users') Feed = get_model('Feeds') # Building a many-to-many relationship. Feed.users = orm.relationship( User, secondary = display, back_populates = 'feeds' ) User.feeds = orm.relationship( Feed, secondary = display, back_populates = 'users' ) user_id = session.query(User.UserID).filter(User.Name == "hansolo").scalar() display_count = session.query(User.feeds).filter(User.UserID == user_id).count if not display_count(): q = sql.select([ sql.literal_column(str(user_id), type_=Integer).label('UserID'), feeds.c.FeedID ]) ins = display.insert().from_select(['UserID', 'FeedID'], q) with get_connection() as conn: conn.execute(ins.prefix_with("OR IGNORE")) assert(display_count())
def tag() -> flask.Response: tag_id = flask.request.args.get('tag', type=int) with model.get_session() as session: tag = session.get(orm_feeds.Tag, tag_id) feeds_lang = {} feeds_lang_not = {} for lang_it in overview_db.all_langs(): feeds_lang[lang_it] = tag_db.all_feeds( langs=[lang_it], tags=[tag], ) feeds_lang_not[lang_it] = tag_db.all_feeds( langs=[lang_it], tags=[tag], tags_flag=False, ) return flask.render_template( "tag.html", **context.base_context(), topnav_title=tag.Name, tag_row=tag, feeds_lang=feeds_lang, feeds_lang_not=feeds_lang_not, )
def draw_cooccurrence_network(net_type=None, db_path=None, output_path=None, top_n=30): assert net_type is not None and output_path is not None and db_path is not None engine = get_engine(db_path) session = get_session(engine) print('正在处理共现数据') graph_data = [] data = [] title = None if net_type == 'keyword': title = 'Author Keyword Co-occurrence Network' data = session.query(WosDocument.unique_id, func.group_concat(WosKeyword.keyword, ';'))\ .join(WosKeyword).group_by(WosDocument.unique_id) filter_data = session.query(WosKeyword.keyword, func.count('*').label('num')) \ .group_by(WosKeyword.keyword).order_by(desc('num')) elif net_type == 'keyword_plus': title = 'WoS Keyword Co-occurrence Network' data = session.query(WosDocument.unique_id, func.group_concat(WosKeywordPlus.keyword_plus, ';'))\ .join(WosKeywordPlus).group_by(WosDocument.unique_id) filter_data = session.query(WosKeywordPlus.keyword_plus, func.count('*').label('num')) \ .group_by(WosKeywordPlus.keyword_plus).order_by(desc('num')) elif net_type == 'author': title = 'Author Co-authorship Network' data = session.query(WosDocument.unique_id, func.group_concat(WosAuthor.last_name +','+ WosAuthor.first_name, ';'))\ .join(WosAuthor).group_by(WosDocument.unique_id) filter_data = session.query(WosAuthor.last_name + ',' + WosAuthor.first_name, func.count('*').label('num')) \ .group_by(WosAuthor.last_name + ',' + WosAuthor.first_name).order_by(desc('num')) else: print('未考虑到的作图情况:', net_type) exit(-1) for row in data: row_split = row[1].split(';') if len(row_split) > 1: graph_data += list(combinations(row_split, 2)) # network是包含了全部关键词的共现网络 print('正在生成共现网络') network = get_network(graph_data, directed=False) session.close() nx.write_graphml(network, 'test.gml') filter_nodes = [i[0] for i in filter_data[top_n:]] sub = nx.restricted_view(network, filter_nodes, []) # 最大联通子图 # sub = sorted(nx.connected_component_subgraphs(sub), key = len, reverse=True)[0] # print('正在绘图') draw_net(sub, title=title, output_path=os.path.join(output_path, net_type))
def all_feeds( langs: typing.List[schema_feeds.Language] = None, tags: typing.List[orm_feeds.Tag] = None, tags_flag: bool = True, users: orm_users.User = None, users_flag: bool = True ) -> typing.Dict[schema_feeds.Language, typing.List[orm_feeds.Feed]]: q = sqla.select(orm_feeds.Feed) q = feeds_order.order_title(q) if langs: q = feeds_filter.filter_languages(q, langs) if tags: q_where = orm_feeds.Feed.tags.any( orm_feeds.Tag.TagID.in_([tag_it.TagID for tag_it in tags]), ) if not tags_flag: q_where = ~q_where q = q.where(q_where) if users: q_where = orm_feeds.Feed.users.any( orm_users.User.UserID.in_([user_it.UserID for user_it in users]), ) if not users_flag: q_where = ~q_where q = q.where(q_where) with model.get_session() as session: return [e[0] for e in session.execute(q)]
def updated_items( user: orm_users.User, langs: typing.List[schema_feeds.Language], tags: typing.List[orm_feeds.Tag], start: datetime.datetime, finish: datetime.datetime, last: datetime.datetime = None, wall_mode: wall.WallMode = wall.WallMode.CLASSIC, ): q = sqla.select(orm_items.Item) q = items_filter.filter_display(q, user) q = items_filter.filter_dates(q, start, finish) q = items_filter.filter_dates(q, finish=last) if langs: q = feeds_filter.filter_languages(q, langs) if tags: q = feeds_filter.filter_tags(q, tags, user) q = items_filter.deduplicate_items(q) q = items_load.load_like(q, user) q = items_load.load_tags(q, user, feed_joined=True) q = wall.order_wall(q, wall_mode, user) with model.get_session() as session: res = [e[0] for e in session.execute(q).unique()] res = wall.sample_wall(res, wall_mode) return res
def __init__(self, session=None, *args, **kwargs): self.session = session or get_session() self._dirty = set() self._finalize = Finalize(self, self.sync, exitpriority=5) super(DatabaseScheduler, self).__init__(*args, **kwargs) self.max_interval = (kwargs.get('max_interval') or self.app.conf.CELERYBEAT_MAX_LOOP_INTERVAL or DEFAULT_MAX_INTERVAL)
def like( like_val: schema_items.Like = schema_items.Like.UP, ) -> flask.Response: user = flask_security.current_user item_id = flask.request.form.get("id", type=int) with model.get_session() as session: item = session.get(orm_items.Item, item_id) db.upsert_like(user, item, like_val) return "", 200
def delete_feed() -> flask.Response: feed_id = flask.request.form.get("feed", type=int) with model.get_session() as session: feed = session.get( orm_feeds.Feed, feed_id, ) feed_db.delete_feeds(feed) return flask.redirect(flask.url_for("overview.settings"))
def delete_tag() -> flask.Response: tag_id = flask.request.form.get("tag", type=int) with model.get_session() as session: tag = session.get( orm_feeds.Tag, tag_id, ) tag_db.delete_tags(tag) return flask.redirect(flask.url_for("overview.settings"))
def get_split_title_keyword_abstract(db_path=None, db_url=None, output_path='', foreground=False): assert (db_path is None and db_url is not None) or (db_path is not None and db_url is None) assert output_path is not None if db_path: engine = get_engine(db_path=db_path) else: engine = get_engine(db_url=db_url) session = get_session(engine) data = session.query(WosDocument).all() path = r'C:/Users/Tom/Desktop/bio_nature' if foreground: inner_path = path + '/foreground' else: inner_path = path + '/background' for document in data: title = document.title.strip() + '.' kw_str = '' # kw_str = ', '.join(document.keywords) for kw in document.keywords: kw_str += kw.keyword + '. ' # kw_str = kw_str[:-2] kp_str = '' # kp_str = ', '.join(document.keyword_plus) for kp in document.keyword_plus: kp_str += kp.keyword_plus + '. ' # kp_str = kp_str[:-2] if document.abs: abs_str = document.abs.replace('. ', '.\n') else: abs_str = '' out_str = '\n'.join([title, kw_str, kp_str, abs_str]) filename = inner_path + '/{}-{}.txt'.format(document.unique_id, document.pub_year) with open(filename, mode='w', encoding='utf-8') as file: file.write(out_str) with open(path + ('/foreground.list' if foreground else '/background.list'), mode='a', encoding='utf-8') as l: l.write( ('foreground' if foreground else 'background') + '/{}-{}.txt\n'.format(document.unique_id, document.pub_year))
def get_tags() -> typing.List[orm_feeds.Tag]: user = flask_security.current_user tags_name = flask.request.args.getlist("tag") q = sqla.select(orm_feeds.Tag, ).where( orm_feeds.Tag.UserID == user.UserID, orm_feeds.Tag.Name.in_(tags_name), ) with model.get_session() as session: return [e[0] for e in session.execute(q)]
def test_xml_write(): session = get_session() User = get_model('Users') Tag = get_model('Tags') user_id = session.query(User.UserID).filter(User.Name == "hansolo").scalar() tags_name = [e.Name for e in session.query(Tag).filter(Tag.UserID == user_id)] for tag_name in tags_name: file_path = os.path.join("feeds.d", tag_name + ".xml") with open(file_path, 'w') as f: write_xml(f, user_id, tag_name) assert(os.path.isfile(file_path))
def all_likes( user: orm_users.User, lang: schema_feeds.Language, score: schema_items.Like = schema_items.Like.UP, ) -> typing.Dict[schema_feeds.Language, typing.List[orm_items.Item]]: q = sqla.select(orm_items.Item) q = items_filter.filter_lang(q, lang) q = items_filter.filter_like(q, score, user) q = items_order.order_date(q) q = items_load.load_feed(q, feed_joined=True) with model.get_session() as session: return [e[0] for e in session.execute(q)]
def all_langs( user: orm_users.User = None, ) -> typing.List[schema_feeds.Language]: q = sqla.select( orm_feeds.Feed.Language, ).order_by( sqla.collate(orm_feeds.Feed.Language, "NOCASE"), ).distinct() if user: q = feeds_filter.filter_display(q, user) with model.get_session() as session: return [schema_feeds.Language[e["Language"]] for e in session.execute(q)]
def run(): session = get_session() class AveragePriceData(Model): # ['footnote_codes', 'item_name', 'end_year', 'area_name', 'begin_year', 'area_code', 'item_code', 'begin_period', 'end_period'] series_id = Text(primary_key=True) footnote_codes = Text() item_name = Text() begin_year = Integer() end_year = Integer() area_name = Text() area_code = Text() item_code = Text() begin_period = Text() end_period = Text() sync_table(AveragePriceData) # read the master data (ap.series) series = pandas.read_csv(path.format("ap/ap.series"), sep='\t', skiprows=1, names=["series_id", "area_code", "item_code", "footnote_codes", "begin_year", "begin_period", "end_year", "end_period"] ) # not sure why i'm getting extra spaces, cleaning that up series["item_code"] = series["item_code"].map(lambda x: str(x).strip()) series.set_index("series_id", inplace=True) # load areas area = pandas.read_fwf(path.format("ap/ap.area"), widths=[4,100], names=["area_code", "area_name"], skiprows=2) area.set_index("area_code", inplace=True) footnotes = pandas.read_fwf(path.format("ap/ap.footnote"), skiprows=1, widths=[1,100], names=["footnote_code", "footnote_text"]) footnotes.set_index("footnote_code", inplace=True) items = pandas.read_fwf(path.format("ap/ap.item"), widths=[7, 100], skiprows=2, names=["item_code", "item_name"]) items.set_index("item_code", inplace=True) result = series.join(area, on="area_code").join(items, on="item_code") print result.head(5) for k, v in result.iterrows(): vals = v.to_dict() vals["series_id"] = k try: AveragePriceData.create(**vals) except Exception as e: print e print vals break print "Created {}".format(k)
def insert_tag( user: orm_users.User, name: str, ) -> orm_feeds.Tag: with model.get_session() as session: tag = orm_feeds.Tag( UserID=user.UserID, Name=name, ) session.add(tag) session.commit() session.refresh(tag) return tag
def test_tags(): feeds = get_table('Feeds') tags = get_table('Tags') tags2feeds = get_table('Tags2Feeds') session = get_session() User = get_model('Users') Feed = get_model('Feeds') Tag = get_model('Tags') # Building a many-to-many relationship. Feed.tags = orm.relationship( Tag, secondary = tags2feeds, back_populates = 'feeds' ) Tag.feeds = orm.relationship( Feed, secondary = tags2feeds, back_populates = 'tags' ) user_id = session.query(User.UserID).filter(User.Name == "hansolo").scalar() ins = tags.insert().values(UserID=user_id, Name="news") with get_connection() as conn: conn.execute(ins.prefix_with("OR IGNORE")) q = sql.select([ tags.c.TagID, feeds.c.FeedID ]).where(sql.and_( feeds.c.Title == "The Atlantic", tags.c.Name == "news" )) ins = tags2feeds.insert().from_select([ tags2feeds.c.TagID, tags2feeds.c.FeedID] , q) with get_connection() as conn: conn.execute(ins.prefix_with("OR IGNORE")) q = (session.query(Tag.feeds) .filter( Tag.Name == "news", Tag.UserID == user_id, Tag.feeds.any(Feed.Title.like("The Atlantic%")) ).count()) assert(q)
def test_user_datastore(): session = get_session() User = get_model('Users') try: user_datastore = get_user_datastore() user_datastore.create_user( name="hansolo", password="******", email="*****@*****.**" ) session.commit() except exc.IntegrityError: session.rollback() assert(session.query(User).filter(User.Name == "hansolo").count())
def last_updated(user_id: int = None) -> datetime.datetime: try: q = sqla.select(sqla.func.min(orm_feeds.Feed.Updated)) if user_id: q = q.where( orm_feeds.Feed.users.any(orm_users.User.UserID == user_id)) with model.get_session() as session: res = session.execute(q).fetchone()[0] if res is None: raise IndexError except IndexError: res = datetime.datetime.utcfromtimestamp(0) return res
def update_feed( feed: orm_feeds.Feed, title: str, link: str, lang: schema_feeds.Language, ): with model.get_session() as session: feed = session.get( orm_feeds.Feed, feed.FeedID, ) feed.Title = title feed.Link = link feed.Language = lang.name session.commit()
def insert_feed( title: str, link: str, lang: schema_feeds.Language, ) -> orm_feeds.Feed: with model.get_session() as session: feed = orm_feeds.Feed( Title=title, Link=link, Language=lang.name, ) session.add(feed) session.commit() session.refresh(feed) return feed
def __init__(self, model, session=None): self.app = current_app self.session = session or get_session() self.name = model.name self.task = model.task self.schedule = model.schedule self.args = json.loads(model.args or '[]') self.kwargs = json.loads(model.kwargs or '{}') self.total_run_count = model.total_run_count self.model = model self.options = {} # need reconstruction if not model.last_run_at: model.last_run_at = self._default_now() orig = self.last_run_at = model.last_run_at if not is_naive(self.last_run_at): self.last_run_at = self.last_run_at.replace(tzinfo=None) assert orig.hour == self.last_run_at.hour # timezone sanity
def before_request(): g.db_session = get_session()
import json import sys import model session = model.get_session() def parse_file(filename): with open(filename, 'rb') as f: data = json.loads(f.read()) for item in data: """ { "id": "U02HCHXGE", "team_id": "T024TC0TE", "name": "allison", "deleted": false, "status": null, "color": "8469bc", "real_name": "Allison Wilbur", "tz": "America\/Los_Angeles", "tz_label": "Pacific Standard Time", "tz_offset": -28800, "profile": { "first_name": "Allison", "last_name": "Wilbur", "title": "Support Ninja", "skype": "allison.wilbur", "phone": "5129442311",
def users(): session = model.get_session() return ", ".join(["{} ({})".format(u.name.encode('utf8'), str(u)) for u in session.query(model.User).all()])
def get(self): self.session_start() model.get_session(self.session['id']) self.render("templates/index.html")
import tornado.ioloop import tornado.web from model import User, Message, get_session from datetime import datetime # Tornadail Configuration server_port = 2626 # Default: 26 db_address = "sqlite:///tornadail.db" # DB of you choice session = get_session(db_address) class ServerHandler(tornado.web.RequestHandler): def get(self): details = { 'server': 'Tornadail', 'version': '0.1', 'protocol': 0.1} self.write(details) self.set_status(200) class UserHandler(tornado.web.RequestHandler): def get(self, username): messages = [] for message in session.query(Message).all(): m = { 'sender': message.sender, 'recipient': message.recipient, 'time': str(message.time),
#!/usr/bin/env python # -*- coding: utf-8 -*- ''' #============================================================================= # FileName: addtask # Desc: # Author: ge.jin # Email: [email protected] # HomePage: wwww.woqutech.com # Version: 0.0.1 # LastChange: 5/3/16 9:33 AM # History: #============================================================================= ''' from celery import Celery, current_app app = Celery('celerybeat-sqlalchemy') app.config_from_object('settings') from model import PeriodicTask, CrontabSchedule, IntervalSchedule, get_session print current_app.conf.CELERYBEAT_MAX_LOOP_INTERVAL session = get_session() cs = CrontabSchedule(minute='*/5') iss = IntervalSchedule(every=30, period='seconds') session.add(cs) # pt = PeriodicTask(name="sdisfsdffaf124asf", task="task_hello", crontab=cs, interval=iss, args='[]', kwargs='{}') pt = PeriodicTask(name="sd124asf", task="task_hello", interval=iss, args='[]', kwargs='{}') session.add(pt) session.flush()