def get_multiple_fulltext(cls, search_string): query = (models.Copr.query.order_by(desc(models.Copr.created_on)).join( models.User).filter(models.Copr.deleted == False)) if "/" in search_string: # copr search by its full name if search_string[0] == '@': # searching for @group/project group_name = "%{}%".format(search_string.split("/")[0][1:]) project = "%{}%".format(search_string.split("/")[1]) query = query.filter( and_(models.Group.name.ilike(group_name), models.Copr.name.ilike(project), models.Group.id == models.Copr.group_id)) query = query.order_by( asc( func.length(models.Group.name) + func.length(models.Copr.name))) else: # searching for user/project user_name = "%{}%".format(search_string.split("/")[0]) project = "%{}%".format(search_string.split("/")[1]) query = query.filter( and_(models.User.username.ilike(user_name), models.Copr.name.ilike(project), models.User.id == models.Copr.user_id)) query = query.order_by( asc( func.length(models.User.username) + func.length(models.Copr.name))) else: # fulltext search query = query.whooshee_search(search_string, whoosheer=CoprWhoosheer, order_by_relevance=100) return query
def dashboard(cls, author=None): """ return dictionary data for dashboard in administrator panel :param author: scc/cxw :return: dict """ dash = {} total_published_article = db.session.query(func.count(Article.id)).\ filter(Article.author == author, Article.status == "PUBLISHED").one() total_unpublished_article = db.session.query(func.count(Article.id)).\ filter(Article.author == author, Article.status != "PUBLISHED").one() total_words = db.session.query(func.sum(func.length(Article.content))).\ filter(Article.author == author).one() total_unpublished_words = db.session.query(func.sum(func.length(Article.content))).\ filter(Article.author == author, Article.status != "PUBLISHED").one() last_article_date = db.session.query(Article.create_date).\ filter(Article.author == author, Article.status == "PUBLISHED").\ order_by(desc(Article.create_date)).first() df1 = db.session.query(func.strftime("%Y/%m", Article.create_date), func.count(Article.id), func.sum(func.length(Article.content))).\ filter(Article.author == author, Article.status == "PUBLISHED").\ group_by(func.strftime("%Y/%m", Article.create_date)).all() df2 = db.session.query(Article.tags).filter(Article.author == author, Article.status == "PUBLISHED").all() df2_list = list(itertools.chain(*df2)) df2_list = [x.split(",") for x in df2_list if x is not None] df2_list = list(itertools.chain(*df2_list)) unique_tag = list(set(df2_list)) tag_counts = [df2_list.count(x) for x in unique_tag] dash.update({ "total_published_article": total_published_article[0], "total_unpublished_article": total_unpublished_article[0], "total_words": total_words[0], "total_unpublished_words": total_unpublished_words[0], "last_article": last_article_date[0].strftime("%Y-%m-%d"), "df_1_month": map(lambda x: x[0], df1), "df_1_counts": map(lambda x: x[1], df1), "df_1_words": map(lambda x: x[2], df1), "df_2_tags": unique_tag, "df_2_tag_counts": tag_counts }) return dash
def sql_to_np(tbl, conn): """Converts a sql table to a Numpy structured array. Parameters ---------- tbl : sqlalchemy.schema.table Table to convert conn : sqlalchemy.engine.Connectable Connection to use to connect to the database Returns ------- A Numpy structured array """ # todo sessionmaker is somehow supposed to be global Session = sessionmaker(bind=conn) session = Session() # first pass, we don't worry about string length dtype = [] for col in tbl.columns: sql_type = col.type np_type = None try: np_type = sql_to_np_types[type(sql_type)] except KeyError: for base_sql_type in sql_to_np_types: if isinstance(sql_type, base_sql_type): #if base_sql_type in inspect.getmro(type(sql_type)): np_type = sql_to_np_types[base_sql_type] continue # TODO nice error if we still don't find anything if np_type is None: raise KeyError('Type not found ' + str(sql_type)) # TODO a more appropriate type of error dtype.append((str(col.name), np_type)) # now, we find the max string length for our char columns str_cols = [tbl.columns[col_name] for col_name, col_dtype in dtype if col_dtype == np.dtype(str)] query_funcs = [func.max(func.length(col)).label(col.name) for col in str_cols] query = session.query(*query_funcs) str_lens = {col_name: str_len for col_name, str_len in it.izip( (desc['name'] for desc in query.column_descriptions), query.one())} def corrected_col_dtype(name, col_dtype): if col_dtype == np.dtype(str): return (name, '|S{}'.format(str_lens[name])) return (name, col_dtype) dtype_corrected = np.dtype([corrected_col_dtype(*dtype_tuple) for dtype_tuple in dtype]) # np.fromiter can't directly use the results of a query: # http://mail.scipy.org/pipermail/numpy-discussion/2010-August/052358.html # TODO deal with unicode (which numpy can't handle) return np.fromiter((np_process_row(row, dtype_corrected) for row in session.query(tbl).all()), dtype=dtype_corrected)
def test_group_having_order(): stmt = select([users.c.name, func.count(addresses.c.id).label("address_count")]). \ select_from(users.join(addresses)). \ group_by(users.c.name). \ having(func.length(users.c.name) > 4). \ order_by(users.c.name.desc()) conn = get_engine().connect() for row in conn.execute(stmt).fetchall(): print row
def _incompatible_changes(metadata, migrate_engine): changes = sautils.Table('changes', metadata, autoload=True) c = changes.c q = sa.select([c.changeid]).where(or_(func.length(c.author) > 255, func.length(c.branch) > 255, func.length(c.revision) > 255, func.length(c.category) > 255)) invalid_changes = q.execute().fetchall() errors = [] if invalid_changes: def format(res): return (" changes.change={id} " "has author, branch, revision or category " "longer than 255".format(id=res[0])) errors = ["- 'changes' table has invalid data:\n" "{0}".format("\n".join(map(format, invalid_changes)))] return errors
def ham_country_grid_coords(call): if "sqlite" in db.engine.driver: q = ( DxccPrefixes.query.filter(DxccPrefixes.call == func.substr(call, 1, func.LENGTH(DxccPrefixes.call))) .order_by(func.length(DxccPrefixes.call).asc()) .limit(1) ) else: q = ( DxccPrefixes.query.filter(DxccPrefixes.call == func.substring(call, 1, func.LENGTH(DxccPrefixes.call))) .order_by(func.length(DxccPrefixes.call).asc()) .limit(1) ) if q.count() <= 0: return None else: qth = coords_to_qth(q[0].lat, q[0].long, 6) return {"qth": qth["qth"], "latitude": q[0].lat, "longitude": q[0].long}
def get_average_tweet_length(self, user): if (user is None): return str( round( db.session.query(func.avg(func.length( Tweet.text))).scalar(), 2)) else: user = db.session.query(User).filter( func.lower(User.screen_name) == func.lower(user)).scalar() return str(round(user.get_avg_tweet_length(), 2))
def parents(cls, cip_id, **kwargs): show_all = kwargs.get("show_all", True) cips = [] if len(cip_id) >= 4: cips.append(cip_id[:2]) if len(cip_id) == 6: cips.append(cip_id[:4]) if not show_all: cips = [cips[-1]] cips = Cip.query.filter(Cip.id.in_(cips)).order_by(asc(func.length(Cip.id))).all() return [attr.data_serialize() for attr in cips], Cip.HEADERS
def get_multiple_fulltext(cls, search_string): query = (models.Copr.query.join(models.User) .filter(models.Copr.deleted == False)) if "/" in search_string: # copr search by its full name if search_string[0] == '@': # searching for @group/project group_name = "%{}%".format(search_string.split("/")[0][1:]) project = "%{}%".format(search_string.split("/")[1]) query = query.filter(and_(models.Group.name.ilike(group_name), models.Copr.name.ilike(project), models.Group.id == models.Copr.group_id)) query = query.order_by(asc(func.length(models.Group.name)+func.length(models.Copr.name))) else: # searching for user/project user_name = "%{}%".format(search_string.split("/")[0]) project = "%{}%".format(search_string.split("/")[1]) query = query.filter(and_(models.User.username.ilike(user_name), models.Copr.name.ilike(project), models.User.id == models.Copr.user_id)) query = query.order_by(asc(func.length(models.User.username)+func.length(models.Copr.name))) else: # fulltext search query = query.whooshee_search(search_string, whoosheer=CoprWhoosheer) return query
def get_data_query(self): all_inv = db_tables.clean_intervention_table student_lookup = all_inv.c.student_lookup school_year = sql.cast( db_func.substr(all_inv.c.school_year, db_func.length(all_inv.c.school_year) - 3, 4), sql.INT).label('school_year') grade = all_inv.c.grade inv_group = all_inv.c.inv_group description = all_inv.c.description # FIXME: Make end year go upto to the last year on record student_years = sql.select([ student_lookup, school_year.label('end_year'), grade, ]).distinct(student_lookup, school_year, grade).where(grade >= 9).alias('student_years') student_invs = sql.select([ student_lookup, school_year, grade, inv_group, description ]).where(grade >= features_config.min_grade).alias('student_invs') joined = sql.join( left=student_invs, right=student_years, onclause=sql.and_( student_invs.c.student_lookup == student_years.c.student_lookup, student_invs.c.school_year <= student_years.c.end_year)) rate_col = db_func.count() * 1.0 / db_func.count( sql.distinct(joined.c.student_invs_school_year)) inv_rates = sql.select([ joined.c.student_invs_student_lookup.label('student_lookup'), joined.c.student_years_end_year.label('school_year'), joined.c.student_years_grade, joined.c.student_invs_inv_group.label('pivot_inv_group'), joined.c.student_invs_description.label('description'), rate_col.label('pivot_class1'), rate_col.label('pivot_class2'), rate_col.label('pivot_class3'), ]).select_from(joined).group_by( joined.c.student_invs_student_lookup, joined.c.student_years_end_year, joined.c.student_invs_inv_group, joined.c.student_invs_description, joined.c.student_years_grade, ) return inv_rates
def parents(cls, cip_id, **kwargs): show_all = kwargs.get("show_all", True) cips = [] if len(cip_id) >= 4: cips.append(cip_id[:2]) if len(cip_id) == 6: cips.append(cip_id[:4]) if not show_all: cips = [cips[-1]] cips = Cip.query.filter(Cip.id.in_(cips)).order_by( asc(func.length(Cip.id))).all() return [attr.data_serialize() for attr in cips], Cip.HEADERS
def get_all_active(cls): """Get all active crawler lists. :return: All active crawler lists. """ try: all = cls.query.filter(cls.is_active.is_(True)) \ .filter(func.length(cls.list_url) > 0).all() except Exception as ex: current_app.logger.debug(ex) all = [] raise return all
def _incompatible_object_state(metadata, migrate_engine): object_state = sautils.Table('object_state', metadata, autoload=True) c = object_state.c q = sa.select([c.objectid]).where(func.length(c.name) > 255) invalid_object_states = q.execute().fetchall() errors = [] if invalid_object_states: def format(res): return (" object_state.objectid={id}" " has name longer than 255".format(id=res[0])) errors = ["- 'object_state' table has invalid data:\n" "{0}".format("\n".join(map(format, invalid_object_states)))] return errors
def children(cls, naics_id, **kwargs): target_level = len(naics_id) + 1 naics_map = {"31-33": ["31", "32", "33"], "44-45": ["44", "45"]} targets = [naics_id] if naics_id in naics_map: target_level = 3 targets = naics_map[naics_id] filters = [Naics.id.startswith(target) for target in targets] filters.append(Naics.id != naics_id) show_all = kwargs.get("show_all", False) if not show_all: filters.append(func.length(Naics.id) == target_level) naics = Naics.query.filter(*filters).distinct(Naics.id).all() return [attr.data_serialize() for attr in naics], Naics.HEADERS
def _incompatible_users(metadata, migrate_engine): users = sautils.Table('users', metadata, autoload=True) c = users.c q = sa.select([c.uid]).where(func.length(c.identifier) > 255) invalid_users = q.execute().fetchall() errors = [] if invalid_users: def format(res): return (" users.uid={id} " "has identifier longer than 255".format(id=res[0])) errors = ["- 'users_state' table has invalid data:\n" "{0}".format("\n".join(map(format, invalid_users)))] return errors
def _run_directory_match_update(self): rd: RpmDetail = aliased(RpmDetail) fd: FileDetail = aliased(FileDetail) lk: RpmFileDetailLink = aliased(RpmFileDetailLink) query = State.get_db_session().query( rd.rpm_detail_id, fd.file_detail_id).join( ResolvedSymlinks, (rd.system_id == ResolvedSymlinks.system_id) & (ResolvedSymlinks.target_type == "D") & (func.length(rd.file_location) > func.length( ResolvedSymlinks.file_location)) & (ResolvedSymlinks.file_location == func.substr( rd.file_location, 1, func.length(ResolvedSymlinks.file_location)))).join( fd, (fd.system_id == ResolvedSymlinks.system_id) & (fd.file_location == (ResolvedSymlinks.resolved_location + func.substr( rd.file_location, func.length(ResolvedSymlinks.file_location) + 1))) ).outerjoin(lk, (lk.file_detail_id == fd.file_detail_id) & (lk.rpm_detail_id == rd.rpm_detail_id)).filter( (rd.system_id == self.system_id) & (lk.rpm_file_detail_link_id == None) & (func.coalesce(fd.file_type, "") != "S") ).distinct() insert_dml = insert(RpmFileDetailLink).from_select([ rd.rpm_detail_id, fd.file_detail_id, ], query) result = State.get_db_session().execute(insert_dml) State.get_db_session().flush() State.get_db_session().commit() self.analyze_database() return result.rowcount
def _inpho_token_generator(document): if PUNC_TABLE.get(ord('-')): del PUNC_TABLE[ord('-')] PUNC_TABLE[ord('\n')] = ord(' ') rest = document.lower() rest = rehyph(rest) rest = strip_punc_word(rest) query = Session.query(Searchpattern) MIN_LEN = 6 short_patterns = Session.query(Searchpattern.searchpattern) short_patterns = short_patterns.filter(func.length(Searchpattern.searchpattern) < MIN_LEN) short_patterns = short_patterns.distinct().all() short_patterns = set(w[0] for w in short_patterns) while rest: if u' ' not in rest: yield rest return first, rest = rest.split(u' ', 1) rest = rest.strip() # always yield the raw string yield first # check if we can simply skip the short patterns if len(first) < MIN_LEN and first not in short_patterns: continue # search the database for keywords patterns = query.filter(Searchpattern.searchpattern.like(first + u' %')).all() exact_match = query.filter(Searchpattern.searchpattern==first).first() if exact_match is not None: patterns.append(exact_match) for p in patterns: # check if multi-phrase starts match in the rest of the phrase. if u' ' in p.searchpattern: first_pattern_word, longpattern = p.searchpattern.split(u' ', 1) if first == first_pattern_word and (rest == longpattern or rest.startswith(longpattern + u' ')): yield u"inpho:{}".format(p.entity.ID) elif first == p.searchpattern: yield u"inpho:{}".format(p.entity.ID)
def children(cls, cip_id, **kwargs): show_all = kwargs.get("show_all", False) sumlevel = kwargs.get("sumlevel", False) filters = [Cip.id.startswith(cip_id), Cip.id != cip_id] if not show_all: # if we are not showing all children, then only display # cip attrs of length (parent length) + 2 t_map = {0: 2, 1: 4, 2: 6} target = len(cip_id) + 2 if sumlevel: target = t_map[int(sumlevel[0])] filters.append(func.length(Cip.id) == target) cips = Cip.query.filter(*filters).distinct(Cip.id).all() return [attr.data_serialize() for attr in cips], Cip.HEADERS
def tge(): # Get the argument - accession accession = request.args['accession'] # Find the TGE for the given accession number tge = TGE.query.filter_by(accession=accession).first_or_404() tgeObs = Observation.query.filter_by(tge_id=tge.id).order_by(desc(Observation.peptide_num)) obsCount = tgeObs.count() pepLengths = Peptide.query.with_entities(func.length(Peptide.aa_seq).label('pepLength')).\ join(TgeToPeptide).join(Observation).join(TGE).\ filter_by(id=tge.id).group_by(Peptide.aa_seq).all() avgPeptNum = Observation.query.with_entities(func.avg(Observation.peptide_num).label('average')).\ filter_by(tge_id=tge.id).one() # Flatten out the list of lists to lists (to use in the for loops) pepLengths = [item for sublist in pepLengths for item in sublist] avgPeptCov = '-' if (sum(pepLengths)): avgPeptCov = float(len(tge.amino_seq))/sum(pepLengths) summary = { 'tge' : tge, 'tgeObs' : tgeObs, 'organisms' : tge.organisms, 'avgPeptNum' : avgPeptNum.average, 'tgeClasses' : tge.tge_class, 'obsCount' : obsCount, 'uniprotIDs': tge.uniprot_id, 'genes': tge.gene_names, 'avgPeptCov': avgPeptCov }; results = [] for obs in tgeObs: exp = Experiment.query.with_entities(Experiment.id, Experiment.accession, Sample.name).\ join(Sample).join(Observation).filter_by(id=obs.id).one() tgeType = re.search("(?<=type:).*?(?=\s)", obs.long_description).group(0) tgeLength = re.search("(?<=len:).*?(?=\s)", obs.long_description).group(0) tgeStrand = re.search("(?<=\().*?(?=\))", obs.long_description).group(0) peptides = Peptide.query.with_entities(Peptide.aa_seq).join(TgeToPeptide).filter_by(obs_id=obs.id).order_by(Peptide.aa_seq).all() peptides = [item for sublist in peptides for item in sublist] results.append({'id': obs.id, 'observation': obs.name, 'sampleName': exp.name, 'expAccession': exp.accession, 'expID': exp.id, 'type': tgeType, 'length': tgeLength, 'strand':tgeStrand, 'organism': obs.organism, 'uniprotID': obs.uniprot_id, 'peptide_num': obs.peptide_num, 'peptides': peptides }) return render_template('results/tge.html', summary = summary, results=results)
def get_repo_destinations(self, repo_name): repo = Repository.get_by_repo_name(repo_name) if not repo: raise HTTPNotFound filter_query = request.GET.get('query') query = Repository.query() \ .order_by(func.length(Repository.repo_name)) \ .filter(or_( Repository.repo_name == repo.repo_name, Repository.fork_id == repo.repo_id)) if filter_query: ilike_expression = u'%{}%'.format(safe_unicode(filter_query)) query = query.filter(Repository.repo_name.ilike(ilike_expression)) add_parent = False if repo.parent: if filter_query in repo.parent.repo_name: if not repo.parent.scm_instance().is_empty(): add_parent = True limit = 20 - 1 if add_parent else 20 all_repos = query.limit(limit).all() if add_parent: all_repos += [repo.parent] repos = [] for obj in self.scm_model.get_repos(all_repos): repos.append({ 'id': obj['name'], 'text': obj['name'], 'type': 'repo', 'obj': obj['dbrepo'] }) data = { 'more': False, 'results': [{ 'text': _('Repositories'), 'children': repos }] if repos else [] } return data
def apply_model(self, sm_account_id): BATCH_SIZE = 10000 self.logger.info( "Starting model application for sm_account: {}".format( sm_account_id)) self.load_model() model_application_log = ModelApplicationLog( sm_account_id=sm_account_id, category_group_id=self.category_group.category_group_id, model_id=self.training_log.id, start_time=datetime.now()) with transaction.manager: self.session.add(model_application_log) user = self.session.query(User).filter( User.user_email == '*****@*****.**').first() self.logger.info("querying unlabelled interactions...") unlabelled = self.session.query( Interactions.id, Interactions.message ).filter(Interactions.sm_account_id == sm_account_id).filter( func.length(Interactions.message) > 0).filter(~exists().where( Interactions.id == InteractionCategory.interaction_id)).filter( ~Interactions.is_page_post).yield_per(BATCH_SIZE) self.logger.info(unlabelled) interactions = [] ids = [] current_batch = 1 processed_interactions = 0 for i, row in enumerate(unlabelled): ids.append(row.id) interactions.append(row.message) if i % BATCH_SIZE == 0: self.logger.info( "predicting categories for batch:{}".format(current_batch)) self.predict_batch(ids, interactions, user, sm_account_id) interactions = [] ids = [] current_batch += 1 processed_interactions += 1 self.predict_batch(ids, interactions, user, sm_account_id) with transaction.manager: model_application_log.end_time = datetime.now() model_application_log.processed_interactions = processed_interactions self.session.merge(model_application_log)
def api_entities(): q = request.args.get('q', '').strip() try: limit = max(int(request.args.get('limit', 10)), 0) except: limit = 10 query = Entity.query if q: q = '%' + q.replace('%', '%%').replace(' ', '%') + '%' query = query.filter(Entity.name.like(q))\ .order_by(func.length(Entity.name)) entities = query.order_by(Entity.name)\ .limit(limit)\ .all() return jsonify({'entities': [e.json() for e in entities]})
def api_authors(): q = request.args.get('q', '').strip() try: limit = max(int(request.args.get('limit', 10)), 0) except: limit = 10 query = Author.query if q: q = '%' + q.replace('%', '%%').replace(' ', '%') + '%' query = query.filter(Author.name.like(q))\ .order_by(func.length(Author.name)) authors = query.order_by(Author.name)\ .limit(limit)\ .all() return jsonify({'authors': [a.json() for a in authors]})
def upgrade(): # re-size existing data if necessary identifier_map = table('cisco_csr_identifier_map', column('ipsec_site_conn_id', sa.String(36))) ipsec_site_conn_id = identifier_map.columns['ipsec_site_conn_id'] op.execute(identifier_map.update(values={ ipsec_site_conn_id: expr.case([(func.length(ipsec_site_conn_id) > 36, func.substr(ipsec_site_conn_id, 1, 36))], else_=ipsec_site_conn_id)})) # Need to drop foreign key constraint before mysql will allow changes with migration.remove_fks_from_table('cisco_csr_identifier_map'): op.alter_column(table_name='cisco_csr_identifier_map', column_name='ipsec_site_conn_id', type_=sa.String(36), existing_nullable=False)
def upgrade(): # re-size existing data if necessary identifier_map = table('cisco_csr_identifier_map', column('ipsec_site_conn_id', sa.String(36))) ipsec_site_conn_id = identifier_map.columns['ipsec_site_conn_id'] op.execute( identifier_map.update( values={ ipsec_site_conn_id: expr.case([(func.length(ipsec_site_conn_id) > 36, func.substr(ipsec_site_conn_id, 1, 36))], else_=ipsec_site_conn_id) })) op.alter_column(table_name='cisco_csr_identifier_map', column_name='ipsec_site_conn_id', type_=sa.String(36), existing_nullable=True)
def fix_biz_id(num_to_fix, num_to_offset): """ Moves biz_id entry to yelp_biz_id field for all reviews num_to_fix is the number of entries to fix """ # select only reviews where the 22-character yelp_biz_id is in the biz_id field reviews = PlatePalReview.query.filter(func.length(PlatePalReview.biz_id)==22).limit(num_to_fix).offset(num_to_offset) # reviews_n = random.sample(reviews, num_to_fix) for review in reviews: # import pdb; pdb.set_trace() yelp_biz_id = review.biz_id review_biz = PlatePalBiz.query.filter_by(yelp_biz_id=yelp_biz_id).first() review.biz_id = review_biz.biz_id db.session.commit() return
def index(): utcnow = datetime.utcnow() session_date = int(utcnow.strftime("%Y%m%d")) cut_off = int((utcnow - timedelta(hours=24)).strftime("%Y%m%d")) users = [] for u in User.query.all(): s = Score.query.filter_by(user_id=u.id, study_y4md=session_date).first() num_pass = db.session.query(func.sum(Score.num_pass)).filter_by( user_id=u.id).filter(Score.study_y4md >= cut_off).first()[0] num_fail = db.session.query(func.sum(Score.num_fail)).filter_by( user_id=u.id).filter(Score.study_y4md >= cut_off).first()[0] cur_xpoints = db.session.query(func.sum(Score.xpoints)).filter_by( user_id=u.id).filter(Score.study_y4md >= cut_off).first()[0] st = { 'name': u.name, 'tot_xpoints': u.tot_xpoints, 'streak': u.streak, 'session_date': session_date, 'num_pass': num_pass or 0, 'num_fail': num_fail or 0, 'num_thumb_up': 0, 'cur_xpoints': cur_xpoints or 0, 'last_study': u.last_study, } # c = Word.query.filter_by(user_id=u.id).filter(Word.streak >= 3).filter(func.length(Word.word) < 4).count() c = db.session.query( Word.word.distinct()).filter_by(user_id=u.id).filter( Word.streak >= 3).filter(func.length(Word.word) < 4).count() st['3streak'] = c t0 = utcnow + timedelta(hours=4) st['num_due'] = Word.query.filter_by(user_id=u.id).filter( Word.next_study < t0).count() users.append(st) users = sorted(users, key=lambda x: (x['session_date'], x['cur_xpoints']), reverse=True) return render_template("index.html", users=users, session_date=session_date, utcnow=utcnow)
def get_negative(): session = db_session() query = session.query(Statement.id.label('statement_id'), func.string_agg(TelegramTextMessage.message, aggregate_order_by(literal_column("'. '"), TelegramTextMessage.created)).label('agg_message')).\ filter(Statement.reviewed==True).\ filter(Statement.is_question==Statement.false_assumption).\ filter(and_(TelegramTextMessage.channel_id==Statement.channel_id, TelegramTextMessage.user_id==Statement.user_id)).\ filter(TelegramTextMessage.message_id.between(Statement.first_msg_id, Statement.last_msg_id)).\ group_by(Statement.id).\ subquery() query_results = session.query(query.c.statement_id, query.c.agg_message, func.length(query.c.agg_message).label('len'), TelegramChannel.tags.label('tags')).\ outerjoin(Statement, Statement.id==query.c.statement_id).\ outerjoin(TelegramChannel, TelegramChannel.channel_id==Statement.channel_id).distinct().all() session.close() return query_results
def do_auto_review(): session = db_session() subquery = session.query(Statement.id.label('statement_id'), func.length(func.string_agg(TelegramTextMessage.message, aggregate_order_by(literal_column("'. '"), TelegramTextMessage.created))).label('agg_message')).\ filter(Statement.reviewed==False).\ filter(and_(TelegramTextMessage.channel_id==Statement.channel_id, TelegramTextMessage.user_id==Statement.user_id)).\ filter(TelegramTextMessage.message_id.between(Statement.first_msg_id, Statement.last_msg_id)).\ group_by(Statement.id).\ subquery() query = session.query(subquery.c.statement_id).filter( subquery.c.agg_message < MINIMIM_QUESTION_LENGHT).subquery() stmt = update(Statement).where(Statement.id.in_(query)).values( reviewed=True, is_question=False, false_assumption=False) session.execute(stmt) session.commit() session.close()
def upgrade(): # re-size existing data if necessary identifier_map = table("cisco_csr_identifier_map", column("ipsec_site_conn_id", sa.String(36))) ipsec_site_conn_id = identifier_map.columns["ipsec_site_conn_id"] op.execute( identifier_map.update( values={ ipsec_site_conn_id: expr.case( [(func.length(ipsec_site_conn_id) > 36, func.substr(ipsec_site_conn_id, 1, 36))], else_=ipsec_site_conn_id, ) } ) ) op.alter_column( table_name="cisco_csr_identifier_map", column_name="ipsec_site_conn_id", type_=sa.String(36), existing_nullable=True, )
def fix_biz_id(num_to_fix, num_to_offset): """ Moves biz_id entry to yelp_biz_id field for all reviews num_to_fix is the number of entries to fix """ # select only reviews where the 22-character yelp_biz_id is in the biz_id field reviews = PlatePalReview.query.filter( func.length(PlatePalReview.biz_id) == 22).limit(num_to_fix).offset( num_to_offset) # reviews_n = random.sample(reviews, num_to_fix) for review in reviews: # import pdb; pdb.set_trace() yelp_biz_id = review.biz_id review_biz = PlatePalBiz.query.filter_by( yelp_biz_id=yelp_biz_id).first() review.biz_id = review_biz.biz_id db.session.commit() return
def get_user_groups(self, name_contains=None, limit=20, only_active=True): # TODO: mikhail: move this method to the UserGroupModel. query = self.sa.query(UserGroup) if only_active: query = query.filter(UserGroup.users_group_active == true()) if name_contains: ilike_expression = u'%{}%'.format(safe_unicode(name_contains)) query = query.filter( UserGroup.users_group_name.ilike(ilike_expression))\ .order_by(func.length(UserGroup.users_group_name))\ .order_by(UserGroup.users_group_name) query = query.limit(limit) user_groups = query.all() perm_set = ['usergroup.read', 'usergroup.write', 'usergroup.admin'] user_groups = UserGroupList(user_groups, perm_set=perm_set) _groups = [ { 'id': group.users_group_id, # TODO: marcink figure out a way to generate the url for the # icon 'icon_link': '', 'value_display': 'Group: %s (%d members)' % ( group.users_group_name, len(group.members), ), 'value': group.users_group_name, 'value_type': 'user_group', 'active': group.users_group_active, } for group in user_groups ] return _groups
def __init__(self): all_inv = db_tables.clean_intervention_table index_cols_dict = { 'student_lookup': all_inv.c.student_lookup, 'school_year': sql.cast( db_func.substr(all_inv.c.school_year, db_func.length(all_inv.c.school_year) - 3, 4), sql.INT).label('school_year'), 'grade': all_inv.c.grade } super(InvFeatures, self).__init__( table_name=inflection.underscore(InvFeatures.__name__), categorical_cols=inv_features_config.categorical_columns, post_features_processor=CompositeFeatureProcessor( [ImputeNullProcessor(fill_unspecified=0)]), data_table=all_inv, blocking_col=all_inv.c.inv_group, index_cols_dict=index_cols_dict)
def api_people(): q = request.args.get('q', '').strip() try: limit = max(int(request.args.get('limit', 10)), 0) except: limit = 10 if q and request.args.get('similar'): people = [p for p, _ in Person.similarly_named_to(q, 0.7)] else: query = Person.query\ .options(joinedload(Person.affiliation)) if q: q = '%' + q.replace('%', '%%').replace(' ', '%') + '%' query = query.filter(Person.name.like(q))\ .order_by(func.length(Person.name)) people = query.order_by(Person.name)\ .limit(limit)\ .all() return jsonify({'people': [p.json() for p in people]})
def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.create_check_constraint('ck_question_body_len', 'question', func.length(column('body')) > 0) op.create_check_constraint('ck_question_title_len', 'question', func.length(column('title')) > 0) op.create_check_constraint('ck_question_user_len', 'question', func.length(column('user')) > 0) op.create_check_constraint('ck_answer_body_len', 'answer', func.length(column('body')) > 0) op.create_check_constraint('ck_answer_user_len', 'answer', func.length(column('user')) > 0) op.create_check_constraint('ck_vote_user_len', 'vote', func.length(column('user')) > 0)
def stats(group_id): g = Group.query.get_or_404(group_id) s = dict() Histogram = namedtuple('Histogram', ['data', 'maxval']) s['nworks'] = g.entries.count() if s['nworks'] == 0: return redirect(url_for('group_detail', group_id=group_id)) creators = ( db.session.query( Entry.creator, func.count(Entry.id).label('nworks') ) .filter(Entry.group_id == group_id) .group_by(Entry.creator) ) s['ncreators'] = creators.count() s['nworks_top'] = creators.order_by(desc('nworks')).limit(20).all() creators = creators.subquery() nworks_data = db.session.query( creators.c.nworks, func.count(creators.c.nworks) ).group_by(creators.c.nworks).all() nworks_max = max(map(operator.itemgetter(1), nworks_data)) s['nworks_hist'] = Histogram(nworks_data, nworks_max) months = db.session.query( func.extract('year', Entry.date_added).label('year'), func.extract('month', Entry.date_added).label('month'), ).filter(Entry.group_id == group_id).subquery() months = db.session.query( months.c.year, months.c.month, func.count(months.c.year) ).group_by(months.c.year, months.c.month).all() months_data = map(lambda m: ("{:.0f}-{:02.0f}".format(*m), m[2]), months) months_max = max(map(operator.itemgetter(1), months_data)) s['months_hist'] = Histogram(months_data, months_max) lengths_data = ( db.session.query( func.length(Entry.notes) / 200 * 200, func.count(Entry.id) ) .filter(Entry.group_id == group_id) .group_by(func.length(Entry.notes) / 200).all() ) lengths_max = max(map(operator.itemgetter(1), lengths_data)) s['lengths_hist'] = Histogram(lengths_data, lengths_max) lengths = db.session.query( Entry.title, func.length(Entry.notes).label("length") ).filter(Entry.group_id == group_id).order_by(desc("length")).all() s['longest'] = lengths[:16] s['shortest'] = reversed(lengths[-16:]) s['total_len'] = sum(map(operator.itemgetter(1), lengths)) s['avg_len'] = 1.0 * s['total_len'] / s['nworks'] return render_template('stats.html', group=g, stats=s)
def with_mobile_content(self, query): return query.filter(func.length(ArticleData.mobile_content) > 0)
def build(self, filters_collection): self.__reInit() for f in filters_collection: cname = f['f'] cond = f['c'] val = f['v'] t = self.__get_column_type(cname) if cond == '<': # ---- begin COND : < if t in ('BIGINT', 'INTEGER',): self.__query = self.__query.filter(getattr(models.CdrRecord, cname) < int(val)) elif t in ('DATETIME',): dt_val = datetime.datetime.fromtimestamp(time.mktime( time.strptime(val, config.FORMAT_DATETIME_FILTER) )) self.__query = self.__query.filter(getattr(models.CdrRecord, cname) < dt_val) pass # ---- end COND : < elif cond == '>': # ---- begin COND : > if t in ('BIGINT', 'INTEGER',): self.__query = self.__query.filter(getattr(models.CdrRecord, cname) > int(val)) elif t in ('DATETIME',): dt_val = datetime.datetime.fromtimestamp(time.mktime( time.strptime(val, config.FORMAT_DATETIME_FILTER) )) self.__query = self.__query.filter(getattr(models.CdrRecord, cname) > dt_val) pass # ---- end COND : < elif cond == '=': # ---- begin COND : = if t in ('BIGINT', 'INTEGER',): self.__query = self.__query.filter(getattr(models.CdrRecord, cname) == int(val)) elif t in ('VARCHAR(255)',): self.__query = self.__query.filter(getattr(models.CdrRecord, cname) == val) pass # ---- end COND : = elif cond == '!=': # ---- begin COND : != if t in ('BIGINT', 'INTEGER',): self.__query = self.__query.filter(getattr(models.CdrRecord, cname) != int(val)) pass # ---- end COND : != elif cond == '<=': # ---- begin COND : <= if t in ('BIGINT', 'INTEGER',): self.__query = self.__query.filter(getattr(models.CdrRecord, cname) <= int(val)) pass # ---- end COND : <= elif cond == '>=': # ---- begin COND : >= if t in ('BIGINT', 'INTEGER',): self.__query = self.__query.filter(getattr(models.CdrRecord, cname) >= int(val)) pass # ---- end COND : >= elif cond == '%': # ---- begin COND : % if len(val) >= 2: if val[:1] == '%': val = '%' + val elif val[-1:] == '%': val += '%' self.__query = self.__query.filter(getattr(models.CdrRecord, cname).like(val)) pass # ---- end COND : % elif cond == 'not(%)': # ---- begin COND : not(%) if len(val) >= 2: if val[:1] == '%': val = '%' + val elif val[-1:] == '%': val += '%' self.__query = self.__query.filter(getattr(models.CdrRecord, cname).notlike(val)) pass # ---- end COND : not(%) elif cond == '= len(x)': # ---- begin COND : = len(x) self.__query = self.__query.filter(func.length(getattr(models.CdrRecord, cname)) == int(val)) pass # ---- end COND : = len(x) elif cond == '!= len(x)': # ---- begin COND : = != len(x) self.__query = self.__query.filter(func.length(getattr(models.CdrRecord, cname)) != int(val)) pass # ---- end COND : = != len(x) elif cond == '> len(x)': # ---- begin COND : = > len(x) self.__query = self.__query.filter(func.length(getattr(models.CdrRecord, cname)) > int(val)) pass # ---- end COND : = > len(x) elif cond == '< len(x)': # ---- begin COND : = < len(x) self.__query = self.__query.filter(func.length(getattr(models.CdrRecord, cname)) < int(val)) pass # ---- end COND : = < len(x) self.__result = self.__query \ .order_by(models.CdrRecord.unix_time.desc()) \ .limit(config.VIEW_LIMIT_VISIBLE_RECORDS) \ .all() return self.__result
def test_executing(self): # re-create a new INSERT object self.ins = self.users.insert() # execute the insert statement res = self.conn.execute(self.ins, uid=1, name='jack', fullname='Jack Jones') assert(res.inserted_primary_key == [1]) res = self.conn.execute(self.ins, uid=2, name='wendy', fullname='Wendy Williams') assert(res.inserted_primary_key == [2]) # the res variable is a ResultProxy object, analagous to DBAPI cursor # issue many inserts, the same is possible for update and delete self.conn.execute(self.addresses.insert(), [ {'id': 1, 'user_id': 1, 'email_address': '*****@*****.**'}, {'id': 2, 'user_id': 1, 'email_address': '*****@*****.**'}, {'id': 3, 'user_id': 2, 'email_address': '*****@*****.**'}, {'id': 4, 'user_id': 2, 'email_address': '*****@*****.**'} ]) # test selects on the inserted values from sqlalchemy.sql import select s = select([self.users]) res = self.conn.execute(s) u1 = res.fetchone() u2 = res.fetchone() # accessing rows assert(u1['name'] == u'jack') assert(u1['fullname'] == u'Jack Jones') assert(u2['name'] == u'wendy') assert(u2['fullname'] == u'Wendy Williams') assert(u1[1] == u1['name']) assert(u1[2] == u1['fullname']) assert(u2[1] == u2['name']) assert(u2[2] == u2['fullname']) # be sure to close the result set res.close() # use cols to access rows res = self.conn.execute(s) u3 = res.fetchone() u4 = res.fetchone() assert(u3[self.users.c.name] == u1['name']) assert(u3[self.users.c.fullname] == u1['fullname']) assert(u4[self.users.c.name] == u2['name']) assert(u4[self.users.c.fullname] == u2['fullname']) # reference individual columns in select clause s = select([self.users.c.name, self.users.c.fullname]) res = self.conn.execute(s) u3 = res.fetchone() u4 = res.fetchone() assert(u3[self.users.c.name] == u1['name']) assert(u3[self.users.c.fullname] == u1['fullname']) assert(u4[self.users.c.name] == u2['name']) assert(u4[self.users.c.fullname] == u2['fullname']) # test joins # cartesian product usrs = [row for row in self.conn.execute(select([self.users]))] addrs = [row for row in self.conn.execute(select([self.addresses]))] prod = [row for row in self.conn.execute(select([self.users, self.addresses]))] assert(len(prod) == len(usrs) * len(addrs)) # inner join on id s = select([self.users, self.addresses]).where(self.users.c.uid == self.addresses.c.user_id) inner = [row for row in self.conn.execute(s)] assert(len(inner) == 4) # operators between columns objects & other col objects/literals expr = self.users.c.uid == self.addresses.c.user_id assert('my_users.uid = addresses.user_id' == str(expr)) # see how Teradata concats two strings assert(str((self.users.c.name + self.users.c.fullname).compile(bind=self.engine)) == 'my_users.name || my_users.fullname') # built-in conjunctions from sqlalchemy.sql import and_, or_ s = select([(self.users.c.fullname + ", " + self.addresses.c.email_address).label('titles')]).where( and_( self.users.c.uid == self.addresses.c.user_id, self.users.c.name.between('m', 'z'), or_( self.addresses.c.email_address.like('*****@*****.**'), self.addresses.c.email_address.like('*****@*****.**') ) ) ) # print(s) res = self.conn.execute(s) for row in res: assert(str(row[0]) == u'Wendy Williams, [email protected]') # more joins # ON condition auto generated based on ForeignKey assert(str(self.users.join(self.addresses)) == 'my_users JOIN addresses ON my_users.uid = addresses.user_id') # specify the join ON condition self.users.join(self.addresses, self.addresses.c.email_address.like(self.users.c.name + '%')) # select from clause to specify tables and the ON condition s = select([self.users.c.fullname]).select_from( self.users.join(self.addresses, self.addresses.c.email_address.like(self.users.c.name + '%'))) res = self.conn.execute(s) assert(len(res.fetchall()) == 3) # left outer joins s = select([self.users.c.fullname]).select_from(self.users.outerjoin(self.addresses)) # outer join works with teradata dialect (unlike oracle dialect < version9) assert(str(s) == str(s.compile(dialect=self.dialect))) # test bind params (positional) from sqlalchemy import text s = self.users.select(self.users.c.name.like( bindparam('username', type_=String)+text("'%'"))) res = self.conn.execute(s, username='******').fetchall() assert(len(res), 1) # functions from sqlalchemy.sql import func, column # certain function names are known by sqlalchemy assert(str(func.current_timestamp()), 'CURRENT_TIMESTAMP') # functions can be used in the select res = self.conn.execute(select( [func.max(self.addresses.c.email_address, type_=String).label( 'max_email')])).scalar() assert(res, '*****@*****.**') # func result sets, define a function taking params x,y return q,z,r # useful for nested queries, subqueries - w/ dynamic params calculate = select([column('q'), column('z'), column('r')]).\ select_from( func.calculate( bindparam('x'), bindparam('y') ) ) calc = calculate.alias() s = select([self.users]).where(self.users.c.uid > calc.c.z) assert('SELECT my_users.uid, my_users.name, my_users.fullname\ FROM my_users, (SELECT q, z, r\ FROM calculate(:x, :y)) AS anon_1\ WHERE my_users.uid > anon_1.z', s) # instantiate the func calc1 = calculate.alias('c1').unique_params(x=17, y=45) calc2 = calculate.alias('c2').unique_params(x=5, y=12) s = select([self.users]).where(self.users.c.uid.between(calc1.c.z, calc2.c.z)) parms = s.compile().params assert('x_2' in parms, 'x_1' in parms) assert('y_2' in parms, 'y_1' in parms) assert(parms['x_1'] == 17, parms['y_1'] == 45) assert(parms['x_2'] == 5, parms['y_2'] == 12) # order by asc stmt = select([self.users.c.name]).order_by(self.users.c.name) res = self.conn.execute(stmt).fetchall() assert('jack' == res[0][0]) assert('wendy' == res[1][0]) # order by desc stmt = select([self.users.c.name]).order_by(self.users.c.name.desc()) res = self.conn.execute(stmt).fetchall() assert('wendy' == res[0][0]) assert('jack' == res[1][0]) # group by stmt = select([self.users.c.name, func.count(self.addresses.c.id)]).\ select_from(self.users.join(self.addresses)).\ group_by(self.users.c.name) res = self.conn.execute(stmt).fetchall() assert(res[1][0] == 'jack') assert(res[0][0] == 'wendy') assert(res[0][1] == res[1][1]) # group by having stmt = select([self.users.c.name, func.count(self.addresses.c.id)]).\ select_from(self.users.join(self.addresses)).\ group_by(self.users.c.name).\ having(func.length(self.users.c.name) > 4) res = self.conn.execute(stmt).fetchall() assert(res[0] == ('wendy', 2)) # distinct stmt = select([self.users.c.name]).\ where(self.addresses.c.email_address.contains(self.users.c.name)).distinct() res = self.conn.execute(stmt).fetchall() assert(len(res) == 2) assert(res[0][0] != res[1][0]) # limit stmt = select([self.users.c.name, self.addresses.c.email_address]).\ select_from(self.users.join(self.addresses)).\ limit(1) res = self.conn.execute(stmt).fetchall() assert(len(res) == 1) # offset # test union and except from sqlalchemy.sql import except_, union u = union( self.addresses.select().where(self.addresses.c.email_address == '*****@*****.**'), self.addresses.select().where(self.addresses.c.email_address.like('*****@*****.**')),)# .order_by(self.addresses.c.email_address) # print(u) # #res = self.conn.execute(u) this fails, syntax error order by expects pos integer? u = except_( self.addresses.select().where(self.addresses.c.email_address.like('%@%.com')), self.addresses.select().where(self.addresses.c.email_address.like('*****@*****.**'))) res = self.conn.execute(u).fetchall() assert(1, len(res)) u = except_( union( self.addresses.select().where(self.addresses.c.email_address.like('*****@*****.**')), self.addresses.select().where(self.addresses.c.email_address.like('*****@*****.**')) ).alias().select(), self.addresses.select(self.addresses.c.email_address.like('*****@*****.**')) ) res = self.conn.execute(u).fetchall() assert(1, len(res)) # scalar subqueries stmt = select([func.count(self.addresses.c.id)]).where(self.users.c.uid == self.addresses.c.user_id).as_scalar() # we can place stmt as any other column within another select res = self.conn.execute(select([self.users.c.name, stmt])).fetchall() # res is a list of tuples, one tuple per user's name assert(2, len(res)) u1 = res[0] u2 = res[1] assert(len(u1) == len(u2)) assert(u1[0] == u'jack') assert(u1[1] == u2[1]) assert(u2[0] == u'wendy') # we can label the inner query stmt = select([func.count(self.addresses.c.id)]).\ where(self.users.c.uid == self.addresses.c.user_id).\ label("address_count") res = self.conn.execute(select([self.users.c.name, stmt])).fetchall() assert(2, len(res)) u1 = res[0] u2 = res[1] assert(len(u1) == 2) assert(len(u2) == 2) # inserts, updates, deletes stmt = self.users.update().values(fullname="Fullname: " + self.users.c.name) res = self.conn.execute(stmt) assert('name_1' in res.last_updated_params()) assert(res.last_updated_params()['name_1'] == 'Fullname: ') stmt = self.users.insert().values(name=bindparam('_name') + " .. name") res = self.conn.execute(stmt, [{'uid': 4, '_name': 'name1'}, {'uid': 5, '_name': 'name2'}, {'uid': 6, '_name': 'name3'}, ]) # updates stmt = self.users.update().where(self.users.c.name == 'jack').values(name='ed') res = self.conn.execute(stmt) assert(res.rowcount == 1) assert(res.returns_rows is False) # update many with bound params stmt = self.users.update().where(self.users.c.name == bindparam('oldname')).\ values(name=bindparam('newname')) res = self.conn.execute(stmt, [ {'oldname': 'jack', 'newname': 'ed'}, {'oldname': 'wendy', 'newname': 'mary'}, ]) assert(res.returns_rows is False) assert(res.rowcount == 1) res = self.conn.execute(select([self.users]).where(self.users.c.name == 'ed')) r = res.fetchone() assert(r['name'] == 'ed') # correlated updates stmt = select([self.addresses.c.email_address]).\ where(self.addresses.c.user_id == self.users.c.uid).\ limit(1) # this fails, syntax error bc of LIMIT - need TOP/SAMPLE instead # Note: TOP can't be in a subquery # res = self.conn.execute(self.users.update().values(fullname=stmt)) # multiple table updates stmt = self.users.update().\ values(name='ed wood').\ where(self.users.c.uid == self.addresses.c.id).\ where(self.addresses.c.email_address.startswith('ed%')) # this fails, teradata does update from set where not update set from where # #res = self.conn.execute(stmt) stmt = self.users.update().\ values({ self.users.c.name: 'ed wood', self.addresses.c.email_address: '*****@*****.**' }).\ where(self.users.c.uid == self.addresses.c.id).\ where(self.addresses.c.email_address.startswith('ed%')) # fails but works on MySQL, should this work for us? # #res = self.conn.execute(stmt) # deletes self.conn.execute(self.addresses.delete()) self.conn.execute(self.users.delete().where(self.users.c.name > 'm')) # matched row counts # updates + deletes have a number indicating # rows matched by WHERE clause res = self.conn.execute(self.users.delete()) assert(res.rowcount == 1)
def cip_filter(cls, level): if level == 'all': return True return func.length(cls.cip) == level
def acs_occ_filter(cls, level): if level == ALL: return True else: target = (int(level) * 2) + 2 return func.length(cls.acs_occ) == target
def get_others_solutions(self, user): my_score = user.get_solution(self).score() return Solution.query.filter( and_(func.length(Solution.value)) >= my_score, Solution.user != user.username, Solution.challenge == self).all()
def process_update_recommendation_scores(payload): text_fields = [ User.hometown, User.occupation, User.education, User.about_me, User.my_travels, User.things_i_like, User.about_place, User.additional_information, User.pet_details, User.kid_details, User.housemate_details, User.other_host_info, User.sleeping_details, User.area, User.house_rules, ] home_fields = [User.about_place, User.other_host_info, User.sleeping_details, User.area, User.house_rules] def poor_man_gaussian(): """ Produces an approximatley std normal random variate """ trials = 5 return (sum([func.random() for _ in range(trials)]) - trials / 2) / sqrt(trials / 12) def int_(stmt): return func.coalesce(cast(stmt, Integer), 0) def float_(stmt): return func.coalesce(cast(stmt, Float), 0.0) with session_scope() as session: # profile profile_text = "" for field in text_fields: profile_text += func.coalesce(field, "") text_length = func.length(profile_text) home_text = "" for field in home_fields: home_text += func.coalesce(field, "") home_length = func.length(home_text) has_text = int_(text_length > 500) long_text = int_(text_length > 2000) has_pic = int_(User.avatar_key != None) can_host = int_(User.hosting_status == HostingStatus.can_host) cant_host = int_(User.hosting_status == HostingStatus.cant_host) filled_home = int_(User.last_minute != None) * int_(home_length > 200) profile_points = 2 * has_text + 3 * long_text + 2 * has_pic + 3 * can_host + 2 * filled_home - 5 * cant_host # references left_ref_expr = int_(1).label("left_reference") left_refs_subquery = ( select(Reference.from_user_id.label("user_id"), left_ref_expr).group_by(Reference.from_user_id).subquery() ) left_reference = int_(left_refs_subquery.c.left_reference) has_reference_expr = int_(func.count(Reference.id) >= 1).label("has_reference") ref_count_expr = int_(func.count(Reference.id)).label("ref_count") ref_avg_expr = func.avg(1.4 * (Reference.rating - 0.3)).label("ref_avg") has_multiple_types_expr = int_(func.count(distinct(Reference.reference_type)) >= 2).label("has_multiple_types") has_bad_ref_expr = int_(func.sum(int_((Reference.rating <= 0.2) | (~Reference.was_appropriate))) >= 1).label( "has_bad_ref" ) received_ref_subquery = ( select( Reference.to_user_id.label("user_id"), has_reference_expr, has_multiple_types_expr, has_bad_ref_expr, ref_count_expr, ref_avg_expr, ) .group_by(Reference.to_user_id) .subquery() ) has_multiple_types = int_(received_ref_subquery.c.has_multiple_types) has_reference = int_(received_ref_subquery.c.has_reference) has_bad_reference = int_(received_ref_subquery.c.has_bad_ref) rating_score = float_( received_ref_subquery.c.ref_avg * ( 2 * func.least(received_ref_subquery.c.ref_count, 5) + func.greatest(received_ref_subquery.c.ref_count - 5, 0) ) ) ref_score = 2 * has_reference + has_multiple_types + left_reference - 5 * has_bad_reference + rating_score # activeness recently_active = int_(User.last_active >= now() - timedelta(days=180)) very_recently_active = int_(User.last_active >= now() - timedelta(days=14)) recently_messaged = int_(func.max(Message.time) > now() - timedelta(days=14)) messaged_lots = int_(func.count(Message.id) > 5) messaging_points_subquery = (recently_messaged + messaged_lots).label("messaging_points") messaging_subquery = ( select(Message.author_id.label("user_id"), messaging_points_subquery) .where(Message.message_type == MessageType.text) .group_by(Message.author_id) .subquery() ) activeness_points = recently_active + 2 * very_recently_active + int_(messaging_subquery.c.messaging_points) # verification phone_verified = int_(User.phone_is_verified) cb_subquery = ( select(ClusterSubscription.user_id.label("user_id"), func.min(Cluster.parent_node_id).label("min_node_id")) .join(Cluster, Cluster.id == ClusterSubscription.cluster_id) .where(ClusterSubscription.role == ClusterRole.admin) .where(Cluster.is_official_cluster) .group_by(ClusterSubscription.user_id) .subquery() ) min_node_id = cb_subquery.c.min_node_id cb = int_(min_node_id >= 1) f = int_(User.id <= 2) wcb = int_(min_node_id == 1) verification_points = 0.0 + 100 * f + 10 * wcb + 5 * cb # response rate t = ( select(Message.conversation_id, Message.time) .where(Message.message_type == MessageType.chat_created) .subquery() ) s = ( select(Message.conversation_id, Message.author_id, func.min(Message.time).label("time")) .group_by(Message.conversation_id, Message.author_id) .subquery() ) hr_subquery = ( select( HostRequest.host_user_id.label("user_id"), func.avg(s.c.time - t.c.time).label("avg_response_time"), func.count(t.c.time).label("received"), func.count(s.c.time).label("responded"), float_( extract( "epoch", percentile_disc(0.33).within_group(func.coalesce(s.c.time - t.c.time, timedelta(days=1000))), ) / 60.0 ).label("response_time_33p"), float_( extract( "epoch", percentile_disc(0.66).within_group(func.coalesce(s.c.time - t.c.time, timedelta(days=1000))), ) / 60.0 ).label("response_time_66p"), ) .join(t, t.c.conversation_id == HostRequest.conversation_id) .outerjoin( s, and_(s.c.conversation_id == HostRequest.conversation_id, s.c.author_id == HostRequest.host_user_id) ) .group_by(HostRequest.host_user_id) .subquery() ) avg_response_time = hr_subquery.c.avg_response_time avg_response_time_hr = float_(extract("epoch", avg_response_time) / 60.0) received = hr_subquery.c.received responded = hr_subquery.c.responded response_time_33p = hr_subquery.c.response_time_33p response_time_66p = hr_subquery.c.response_time_66p response_rate = float_(responded / (1.0 * func.greatest(received, 1))) # be careful with nulls response_rate_points = -10 * int_(response_time_33p > 60 * 48.0) + 5 * int_(response_time_66p < 60 * 48.0) recommendation_score = ( profile_points + ref_score + activeness_points + verification_points + response_rate_points + 2 * poor_man_gaussian() ) scores = ( select(User.id.label("user_id"), recommendation_score.label("score")) .outerjoin(messaging_subquery, messaging_subquery.c.user_id == User.id) .outerjoin(left_refs_subquery, left_refs_subquery.c.user_id == User.id) .outerjoin(received_ref_subquery, received_ref_subquery.c.user_id == User.id) .outerjoin(cb_subquery, cb_subquery.c.user_id == User.id) .outerjoin(hr_subquery, hr_subquery.c.user_id == User.id) ).subquery() session.execute( User.__table__.update().values(recommendation_score=scores.c.score).where(User.id == scores.c.user_id) ) logger.info("Updated recommendation scores")
def naics_filter(cls, level): if level == ALL: return True target_len = int(level) + 2 return func.length(cls.naics) == target_len
def test_executing(self): # re-create a new INSERT object self.ins = self.users.insert() # execute the insert statement res = self.conn.execute(self.ins, uid=1, name='jack', fullname='Jack Jones') assert(res.inserted_primary_key == [1]) res = self.conn.execute(self.ins, uid=2, name='wendy', fullname='Wendy Williams') assert(res.inserted_primary_key == [2]) # the res variable is a ResultProxy object, analagous to DBAPI cursor # issue many inserts, the same is possible for update and delete self.conn.execute(self.addresses.insert(), [ {'id': 1, 'user_id': 1, 'email_address': '*****@*****.**'}, {'id': 2, 'user_id': 1, 'email_address': '*****@*****.**'}, {'id': 3, 'user_id': 2, 'email_address': '*****@*****.**'}, {'id': 4, 'user_id': 2, 'email_address': '*****@*****.**'} ]) # test selects on the inserted values from sqlalchemy.sql import select s = select([self.users]) res = self.conn.execute(s) u1 = res.fetchone() u2 = res.fetchone() # accessing rows assert(u1['name'] == u'jack') assert(u1['fullname'] == u'Jack Jones') assert(u2['name'] == u'wendy') assert(u2['fullname'] == u'Wendy Williams') assert(u1[1] == u1['name']) assert(u1[2] == u1['fullname']) assert(u2[1] == u2['name']) assert(u2[2] == u2['fullname']) # be sure to close the result set res.close() # use cols to access rows res = self.conn.execute(s) u3 = res.fetchone() u4 = res.fetchone() assert(u3[self.users.c.name] == u1['name']) assert(u3[self.users.c.fullname] == u1['fullname']) assert(u4[self.users.c.name] == u2['name']) assert(u4[self.users.c.fullname] == u2['fullname']) # reference individual columns in select clause s = select([self.users.c.name, self.users.c.fullname]) res = self.conn.execute(s) u3 = res.fetchone() u4 = res.fetchone() assert(u3[self.users.c.name] == u1['name']) assert(u3[self.users.c.fullname] == u1['fullname']) assert(u4[self.users.c.name] == u2['name']) assert(u4[self.users.c.fullname] == u2['fullname']) # test joins # cartesian product usrs = [row for row in self.conn.execute(select([self.users]))] addrs = [row for row in self.conn.execute(select([self.addresses]))] prod = [row for row in self.conn.execute(select([self.users, self.addresses]))] assert(len(prod) == len(usrs) * len(addrs)) # inner join on id s = select([self.users, self.addresses]).where(self.users.c.uid == self.addresses.c.user_id) inner = [row for row in self.conn.execute(s)] assert(len(inner) == 4) # operators between columns objects & other col objects/literals expr = self.users.c.uid == self.addresses.c.user_id assert('my_users.uid = addresses.user_id' == str(expr)) # see how Teradata concats two strings assert(str((self.users.c.name + self.users.c.fullname).compile(bind=self.engine)) == 'my_users.name || my_users.fullname') # built-in conjunctions from sqlalchemy.sql import and_, or_ s = select([(self.users.c.fullname + ", " + self.addresses.c.email_address).label('titles')]).where( and_( self.users.c.uid == self.addresses.c.user_id, self.users.c.name.between('m', 'z'), or_( self.addresses.c.email_address.like('*****@*****.**'), self.addresses.c.email_address.like('*****@*****.**') ) ) ) # print(s) res = self.conn.execute(s) for row in res: assert(str(row[0]) == u'Wendy Williams, [email protected]') # more joins # ON condition auto generated based on ForeignKey assert(str(self.users.join(self.addresses)) == 'my_users JOIN addresses ON my_users.uid = addresses.user_id') # specify the join ON condition self.users.join(self.addresses, self.addresses.c.email_address.like(self.users.c.name + '%')) # select from clause to specify tables and the ON condition s = select([self.users.c.fullname]).select_from( self.users.join(self.addresses, self.addresses.c.email_address.like(self.users.c.name + '%'))) res = self.conn.execute(s) assert(len(res.fetchall()) == 3) # left outer joins s = select([self.users.c.fullname]).select_from(self.users.outerjoin(self.addresses)) # outer join works with teradata dialect (unlike oracle dialect < version9) assert(str(s) == str(s.compile(dialect=self.dialect))) # test bind params (positional) from sqlalchemy import text s = self.users.select(self.users.c.name.like( bindparam('username', type_=String)+text("'%'"))) res = self.conn.execute(s, username='******').fetchall() assert(len(res), 1) # functions from sqlalchemy.sql import func, column # certain function names are known by sqlalchemy assert(str(func.current_timestamp()), 'CURRENT_TIMESTAMP') # functions can be used in the select res = self.conn.execute(select( [func.max(self.addresses.c.email_address, type_=String).label( 'max_email')])).scalar() assert(res, '*****@*****.**') # func result sets, define a function taking params x,y return q,z,r # useful for nested queries, subqueries - w/ dynamic params calculate = select([column('q'), column('z'), column('r')]).\ select_from( func.calculate( bindparam('x'), bindparam('y') ) ) calc = calculate.alias() s = select([self.users]).where(self.users.c.uid > calc.c.z) assert('SELECT my_users.uid, my_users.name, my_users.fullname\ FROM my_users, (SELECT q, z, r\ FROM calculate(:x, :y)) AS anon_1\ WHERE my_users.uid > anon_1.z', s) # instantiate the func calc1 = calculate.alias('c1').unique_params(x=17, y=45) calc2 = calculate.alias('c2').unique_params(x=5, y=12) s = select([self.users]).where(self.users.c.uid.between(calc1.c.z, calc2.c.z)) parms = s.compile().params assert('x_2' in parms, 'x_1' in parms) assert('y_2' in parms, 'y_1' in parms) assert(parms['x_1'] == 17, parms['y_1'] == 45) assert(parms['x_2'] == 5, parms['y_2'] == 12) # order by asc stmt = select([self.users.c.name]).order_by(self.users.c.name) res = self.conn.execute(stmt).fetchall() assert('jack' == res[0][0]) assert('wendy' == res[1][0]) # order by desc stmt = select([self.users.c.name]).order_by(self.users.c.name.desc()) res = self.conn.execute(stmt).fetchall() assert('wendy' == res[0][0]) assert('jack' == res[1][0]) # group by stmt = select([self.users.c.name, func.count(self.addresses.c.id)]).\ select_from(self.users.join(self.addresses)).\ group_by(self.users.c.name) res = self.conn.execute(stmt).fetchall() assert(res[0][0] == 'jack') assert(res[1][0] == 'wendy') assert(res[0][1] == res[1][1]) # group by having stmt = select([self.users.c.name, func.count(self.addresses.c.id)]).\ select_from(self.users.join(self.addresses)).\ group_by(self.users.c.name).\ having(func.length(self.users.c.name) > 4) res = self.conn.execute(stmt).fetchall() assert(res[0] == ('wendy', 2)) # distinct stmt = select([self.users.c.name]).\ where(self.addresses.c.email_address.contains(self.users.c.name)).distinct() res = self.conn.execute(stmt).fetchall() assert(len(res) == 2) assert(res[0][0] != res[1][0]) # limit stmt = select([self.users.c.name, self.addresses.c.email_address]).\ select_from(self.users.join(self.addresses)).\ limit(1) res = self.conn.execute(stmt).fetchall() assert(len(res) == 1) # offset # test union and except from sqlalchemy.sql import except_, union u = union( self.addresses.select().where(self.addresses.c.email_address == '*****@*****.**'), self.addresses.select().where(self.addresses.c.email_address.like('*****@*****.**')),)# .order_by(self.addresses.c.email_address) # print(u) # #res = self.conn.execute(u) this fails, syntax error order by expects pos integer? u = except_( self.addresses.select().where(self.addresses.c.email_address.like('%@%.com')), self.addresses.select().where(self.addresses.c.email_address.like('*****@*****.**'))) res = self.conn.execute(u).fetchall() assert(1, len(res)) u = except_( union( self.addresses.select().where(self.addresses.c.email_address.like('*****@*****.**')), self.addresses.select().where(self.addresses.c.email_address.like('*****@*****.**')) ).alias().select(), self.addresses.select(self.addresses.c.email_address.like('*****@*****.**')) ) res = self.conn.execute(u).fetchall() assert(1, len(res)) # scalar subqueries stmt = select([func.count(self.addresses.c.id)]).where(self.users.c.uid == self.addresses.c.user_id).as_scalar() # we can place stmt as any other column within another select res = self.conn.execute(select([self.users.c.name, stmt])).fetchall() # res is a list of tuples, one tuple per user's name assert(2, len(res)) u1 = res[0] u2 = res[1] assert(len(u1) == len(u2)) assert(u1[0] == u'jack') assert(u1[1] == u2[1]) assert(u2[0] == u'wendy') # we can label the inner query stmt = select([func.count(self.addresses.c.id)]).\ where(self.users.c.uid == self.addresses.c.user_id).\ label("address_count") res = self.conn.execute(select([self.users.c.name, stmt])).fetchall() assert(2, len(res)) u1 = res[0] u2 = res[1] assert(len(u1) == 2) assert(len(u2) == 2) # inserts, updates, deletes stmt = self.users.update().values(fullname="Fullname: " + self.users.c.name) res = self.conn.execute(stmt) assert('name_1' in res.last_updated_params()) assert(res.last_updated_params()['name_1'] == 'Fullname: ') stmt = self.users.insert().values(name=bindparam('_name') + " .. name") res = self.conn.execute(stmt, [{'uid': 4, '_name': 'name1'}, {'uid': 5, '_name': 'name2'}, {'uid': 6, '_name': 'name3'}, ]) # updates stmt = self.users.update().where(self.users.c.name == 'jack').values(name='ed') res = self.conn.execute(stmt) assert(res.rowcount == 1) assert(res.returns_rows is False) # update many with bound params stmt = self.users.update().where(self.users.c.name == bindparam('oldname')).\ values(name=bindparam('newname')) res = self.conn.execute(stmt, [ {'oldname': 'jack', 'newname': 'ed'}, {'oldname': 'wendy', 'newname': 'mary'}, ]) assert(res.returns_rows is False) assert(res.rowcount == 1) res = self.conn.execute(select([self.users]).where(self.users.c.name == 'ed')) r = res.fetchone() assert(r['name'] == 'ed') # correlated updates stmt = select([self.addresses.c.email_address]).\ where(self.addresses.c.user_id == self.users.c.uid).\ limit(1) # this fails, syntax error bc of LIMIT - need TOP/SAMPLE instead # Note: TOP can't be in a subquery # res = self.conn.execute(self.users.update().values(fullname=stmt)) # multiple table updates stmt = self.users.update().\ values(name='ed wood').\ where(self.users.c.uid == self.addresses.c.id).\ where(self.addresses.c.email_address.startswith('ed%')) # this fails, teradata does update from set where not update set from where # #res = self.conn.execute(stmt) stmt = self.users.update().\ values({ self.users.c.name: 'ed wood', self.addresses.c.email_address: '*****@*****.**' }).\ where(self.users.c.uid == self.addresses.c.id).\ where(self.addresses.c.email_address.startswith('ed%')) # fails but works on MySQL, should this work for us? # #res = self.conn.execute(stmt) # deletes self.conn.execute(self.addresses.delete()) self.conn.execute(self.users.delete().where(self.users.c.name > 'm')) # matched row counts # updates + deletes have a number indicating # rows matched by WHERE clause res = self.conn.execute(self.users.delete()) assert(res.rowcount == 1)
def best_solution(self): return Solution.query.filter_by(challenge=self).order_by(func.length(Solution.value)).first()
def get(self, request, *args, **kwargs): records = [] thread_id = int(kwargs['thread_id']) with Session() as s: if 'timestamp' in kwargs: """ 単一のレコードを返す方のAPI """ timestamp = int(kwargs['timestamp']) bin_id = a2b_hex(kwargs['record_id']) r = Record.get(s, thread_id, bin_id, timestamp).with_entities( Record.bin_id, Record.timestamp, Record.name, Record.mail, Record.body, sql_func.length(Record.attach).label('attach_len'), Record.suffix).first() if r: records.append({ 'id': b2a_hex(r.bin_id).decode('ascii'), 'timestamp': int(datetime2timestamp(r.timestamp)), 'name': r.name, 'mail': r.mail, 'body': r.body, 'attach': bool(r.attach_len), 'suffix': r.suffix, }) else: """ 複数のレコードを返す方のAPI """ bin_id = request.GET.get('record_id') if bin_id: bin_id = a2b_hex(bin_id) matchRecords = Record.gets(s, thread_id=thread_id, stime=intOrNone(request.GET.get('start_time')), etime=intOrNone(request.GET.get('end_time')), bin_id=bin_id, limit=intOrNone(request.GET.get('limit')), ).with_entities( Record.bin_id, Record.timestamp, Record.name, Record.mail, Record.body, sql_func.length(Record.attach).label('attach_len'), Record.suffix) for r in matchRecords: records.append({ 'id': b2a_hex(r.bin_id).decode('ascii'), 'timestamp': int(datetime2timestamp(r.timestamp)), 'name': r.name, 'mail': r.mail, 'body': r.body, 'attach': bool(r.attach_len), 'suffix': r.suffix, }) obj = { 'records': records, } return JsonResponse(obj)