def locate_disconnected_animals(settings_file, input_file=None, delete=False): """ Connects to the database and locates all animals that are disconnected from the dataset """ logging.info("Performing Disconnected Animal Detection") if input_file: logging.info("Will filter Animal IDs using data from %s" % input_file) if delete: logging.info("Will delete located Animals") settings, engine, session_class = init(settings_file) with closing(session_class()) as session: sire = aliased(Animal) dam = aliased(Animal) child1 = aliased(Animal) child2 = aliased(Animal) disconnected_animals = ( session.query(Animal) .outerjoin(sire, Animal.sire_id == sire.id) .outerjoin(dam, Animal.dam_id == dam.id) .outerjoin(child1, Animal.id == child1.sire_id) .outerjoin(child2, Animal.id == child2.dam_id) .filter((sire.id == None) & (dam.id == None) & (child1.id == None) & (child2.id == None)) ) if input_file: disconnected_animals = disconnected_animals.filter(Animal.id.in_(load_csv(settings, input_file).keys())) logging.info("Detected %d total disconnected Animals" % disconnected_animals.count()) logging.info("Disconnected Animal IDs: %s" % ",".join([str(animal.id) for animal in disconnected_animals])) if delete: logging.info("Deleting disconnected Animals") for animal in disconnected_animals: session.delete(animal) session.commit()
def _last_assoc_timestamps(session, dataset): """ Get the timestamps of the latest assocxtrc per runningcatalog and band. We can't get the assoc ID's directly, because they are unique and can't by put in the group by. You can get the eventual assoc ID's by joining this query again with the assoc table (see last_assoc_per_band func) args: session (session): A SQLAlchemy session dataset (Dataset): A SQLALchemy dataset model returns: a SQLAlchemy subquery containing runcat id, timestamp, band id """ a = aliased(Assocxtrsource, name='a_timestamps') e = aliased(Extractedsource, name='e_timestamps') r = aliased(Runningcatalog, name='r_timestamps') i = aliased(Image, name='i_timestamps') return session.query(r.id.label('runcat'), func.max(i.taustart_ts).label('max_time'), i.band_id.label('band') ). \ select_from(r). \ join(a, r.id == a.runcat_id). \ join(e, a.xtrsrc_id == e.id). \ join(i, i.id == e.image_id). \ group_by(r.id, i.band_id). \ filter(i.dataset == dataset). \ subquery(name='last_assoc_timestamps')
def get_potential_fuzzy_matches(self, lang): """ Returns a list of pairs (text,Parts) which may be fuzzy matches for this description. The part is the already translated version, included because we needed to look it up anyway for existence """ session = Session.object_session(self) # Find all descriptions which share a part with this description PartDescr2=aliased(PartDescription) related_descrs = set( d for d, in session.query(PartDescr2.description_id). join(PartDescription, PartDescription.part_md5==PartDescr2.part_md5). filter(PartDescription.description_id==self.description_id)) # Always add self, as part_description table is not complete related_descrs.add(self.description_id) # Finally, find all parts of all descriptions which have been # translated and and part of a package which share a source or # package # FIXME: don't use Description.package -> use package_version-tb Descr2 = aliased(Description) related_parts = session.query(Part, Descr2).join(PartDescription, PartDescription.part_md5 == Part.part_md5). \ join(Descr2, Descr2.description_id == PartDescription.description_id). \ join(Description, (Description.package == Descr2.package) | (Description.source == Descr2.source)). \ filter(Description.description_id.in_(related_descrs)). \ filter(Part.language == lang).all() # First we go through the descriptions, deconstructing them into parts descr_map = dict( (part_md5, part) for _, descr in related_parts for part, part_md5 in descr.get_description_parts() ) result = [ (descr_map.get(trans.part_md5), trans) for trans, _ in related_parts ] return result
def get_descendants_select(source_id): """Return a query that includes the descendants of an idea. (not idea itself yet.) Beware: we use a recursive query via a CTE and the PostgreSQL-specific ARRAY type. Blame this guy for that choice: http://explainextended.com/2009/09/24/adjacency-list-vs-nested-sets-postgresql/ Also, that other guy provided insight into using CTE queries: http://stackoverflow.com/questions/11994092/how-can-i-perform-this-recursive-common-table-expression-in-sqlalchemy A literal column and an op complement nicely all this craziness. All I can say is SQLAlchemy kicks ass, and so does PostgreSQL. """ link = select( [IdeaLink.source_id, IdeaLink.target_id] ).select_from( IdeaLink ).where( IdeaLink.source_id == source_id ).cte(recursive=True) source_alias = aliased(link) targets_alias = aliased(IdeaLink) parent_link = targets_alias.source_id == source_alias.c.target_id children = select( [targets_alias.source_id, targets_alias.target_id] ).select_from(targets_alias).where(parent_link) with_children = link.union_all(children) return with_children
def get_description_predecessors(self): """ get all descriptions of the predecessors """ session = Session.object_session(self) PackageVersion2=aliased(PackageVersion) #SELECT B.description_id from package_version_tb AS A LEFT JOIN package_version_tb AS B ON A.package = B.package where A.description_id='79246' group by B.description_id; DescriptionIDs = [x for x, in session.query(PackageVersion2.description_id). \ join(PackageVersion, PackageVersion2.package == PackageVersion.package). \ filter(PackageVersion.description_id == self.description_id).\ filter(PackageVersion2.description_id != self.description_id). \ group_by(PackageVersion2.description_id).\ all()] # START REMOVE AFTER FIX # FIXME # use later only package_version_tb and not the old package field # SELECT B.description_id from description_tb AS A left join description_tb AS B ON A.package = B.package where A.description_id='79246' group by B.description_id; Description2=aliased(Description) DescriptionIDs2 = [x for x, in session.query(Description2.description_id). \ join(Description, Description2.package == Description.package). \ filter(Description.description_id == self.description_id).\ filter(Description.description_id != self.description_id). \ group_by(Description2.description_id). \ all()] DescriptionIDs += DescriptionIDs2 # END REMOVE AFTER FIX #return dict.fromkeys(DescriptionIDs).keys() result = session.query(Description).filter(Description.description_id.in_(DescriptionIDs)).all() return result
def test_hybrid_descriptor_three(self): class Point(object): def __init__(self, x, y): self.x, self.y = x, y @hybrid_property def x_alone(self): return self.x self._fixture(Point) alias = aliased(Point) eq_(str(Point.x_alone), "Point.x") eq_(str(alias.x_alone), "AliasedClass_Point.x") assert Point.x_alone is Point.x eq_(str(alias.x_alone == alias.x), "point_1.x = point_1.x") a2 = aliased(Point) eq_(str(a2.x_alone == alias.x), "point_1.x = point_2.x") sess = Session() self.assert_compile( sess.query(alias).filter(alias.x_alone > Point.x), "SELECT point_1.id AS point_1_id, point_1.x AS point_1_x, " "point_1.y AS point_1_y FROM point AS point_1, point " "WHERE point_1.x > point.x" )
def data(self, use_choice_labels=False, expand_collections=False, ignore_private=True): session = self.db_session CreateUser = aliased(datastore.User) ModifyUser = aliased(datastore.User) query = ( session.query( models.Enrollment.id.label('id'), models.Patient.pid.label('pid'), models.Site.name.label('site'), models.Enrollment.id.label('enrollment_id'), # BBB models.Study.title.label('study'), models.Patient.nurse.label('nurse'), # BBB models.Enrollment.reference_number.label('reference_number'), models.Enrollment.consent_date.label('consent_date'), models.Enrollment.latest_consent_date.label( 'latest_consent_date'), models.Enrollment.termination_date.label('termination_date'), models.Enrollment.create_date, CreateUser.key.label('create_user'), models.Enrollment.modify_date, ModifyUser.key.label('modify_user')) .select_from(models.Enrollment) .join(models.Enrollment.patient) .join(models.Enrollment.study) .join(models.Patient.site) .join(CreateUser, models.Enrollment.create_user) .join(ModifyUser, models.Enrollment.modify_user) .order_by(models.Enrollment.id, models.Study.title, models.Patient.pid)) return query
def most_interacting_domains_from_3DID(session_3DID): """SQLAlchemy query returns an ordered list of all interacting pairs of domains where minimum number of interactions is 100. * **session_3DID** is SQLAlchemy session that this function should use. SQL equivalent: .. code-block:: sql SELECT p1.domain_id, p2.domain_id, COUNT(p1.domain_id) AS d1, COUNT(p2.domain_id) AS d2 FROM PDB AS p1, Interacting_PDBs AS i1, PDB AS p2, Interacting_PDBs AS i2 WHERE p1.id = i1.PDB_first_id AND p2.id = i2.PDB_second_id AND i1.id = i2.id GROUP BY p1.domain_id, p2.domain_id HAVING d1 > 100 AND d2 > 100 ORDER BY d1, d2; """ p1 = aliased(PDB, name='p1') p2 = aliased(PDB, name='p2') i1 = aliased(Interacting_PDBs, name='i1') i2 = aliased(Interacting_PDBs, name='i2') d1 = func.count(p1.domain_id).label('d1') d2 = func.count(p2.domain_id).label('d2') most_interacting = session_3DID.query(p1.domain_id, p2.domain_id, d1, d2).filter(p1.id==i1.PDB_first_id).filter(p2.id== i2.PDB_second_id).filter(i1.id==i2.id).group_by(p1.domain_id, p2.domain_id).having(d1 > 100).having(d2 > 100).order_by(d1, d2).all() return most_interacting
def unbind_objects_on_objects(req): rospy.loginfo( "SEMAP DB SRVs: get_objects_within_object" ) call_get_objects_in_objects(req.reference_object_types, req.target_types, req.fully_within) res = GetObjectsWithinObjectResponse() obj1 = aliased( ObjectInstance ) geo1 = aliased( GeometryModel ) obj2 = aliased( ObjectInstance ) geo2 = aliased( GeometryModel ) if req.target_object_types: obj2_ids = any_obj_types_ids(obj2, req.target_object_types) else: obj2_ids = any_obj_ids(obj2) print req.reference_object_id print req.target_object_types print obj2_ids.all() rospy.loginfo("SEMAP DB SRVs: test_containment_relations3d tries to find em") ids = db().query( obj2.id ).\ filter(obj1.id == req.reference_object_id, obj1.absolute_description_id == geo1.abstraction_desc, geo1.type == "BoundingBox", \ obj1.frame_id == FrameNode.id, obj2.id.in_( obj2_ids ), obj2.absolute_description_id == geo2.abstraction_desc, geo2.type == "BoundingBox", \ SFCGAL_Contains3D( geo1.geometry, geo2.geometry) ).all() res.target_object_ids = [id for id, in ids] for id in res.target_object_ids: call_change_frame(id, "world", False) return res
def get_containment_pairs( req ): rospy.loginfo( "SEMAP DB SRVs: get_containment_pairs" ) res = GetObjectLocation() obj1 = aliased( ObjectInstance ) geo1 = aliased( GeometryModel ) obj2 = aliased( ObjectInstance ) geo2 = aliased( GeometryModel ) if req.reference_object_types: obj1_ids = any_obj_types_ids(obj1, req.refrence_object_types) else: obj1_ids = any_obj_ids(obj1) if req.target_object_types: obj2_ids = any_obj_types_ids(obj2, req.target_object_types) else: obj2_ids = any_obj_ids(obj2) ids = db().query( obj1.id, obj2.id ).\ filter(obj1.id.in_( obj1_ids ), obj1.absolute_description_id == geo1.abstraction_desc, geo1.type == "BoundingBox", \ obj2.id.in_( obj2_ids ), obj2.absolute_description_id == geo2.abstraction_desc, geo2.type == "BoundingBox", \ SFCGAL_Contains3D( geo1.geometry, geo2.geometry) ).all() for i1, i2 in ids: pair = ObjectPair() pair.reference_id = i1 pair.relations.append("contains") pair.target_id = i2 res.target_object_ids = [id for id, in ids] return res
def test_containment_relations3d( req ): rospy.loginfo( "SEMAP DB SRVs: test_containment_relations3d" ) res = GetDirectionalRelations2DResponse() obj1 = aliased( ObjectInstance ) geo1 = aliased( GeometryModel ) obj2 = aliased( ObjectInstance ) geo2 = aliased( GeometryModel ) geos = db().query( SFCGAL_Contains3D( geo1.geometry, geo2.geometry) ).\ filter(obj1.id == req.reference_id, \ obj1.absolute_description_id == geo1.abstraction_desc, geo1.type == "BoundingBox", \ obj2.id == req.target_id, \ obj2.absolute_description_id == geo2.abstraction_desc, geo2.type == "BoundingBox" ).all() print geos for geoI, geoII in geos: print 'geoI', db().execute( ST_AsText( geoI.geometry) ).scalar() #print 'isValid', db().execute( SFCGAL_IsValid( geoI.geometry) ).scalar() print 'geoI', db().execute( ST_AsText( geoII.geometry) ).scalar() #print 'isValid', db().execute( SFCGAL_IsValid( geoII.geometry) ).scalar() containment_status = db().execute( SFCGAL_Contains3D( geoI.geometry, geoII.geometry ) ).scalar() print 'containment_status:', containment_status #if containment_status: # rospy.loginfo("OBJECT CONTAINMENT VERIFIED!") #else: # rospy.loginfo("OBJECT CONTAINMENT REJECTED!") return res
def get_objects_within_object( req ): rospy.loginfo( "SEMAP DB SRVs: get_objects_within_object" ) res = GetObjectsWithinObjectResponse() ref_obj = aliased( ObjectInstance ) ref_geo = aliased( GeometryModel ) tar_obj = aliased( ObjectInstance ) tar_geo = aliased( GeometryModel ) if req.target_object_types: tar_ids = any_obj_types_ids(tar_obj, req.target_object_types) else: tar_ids = any_obj_ids(tar_obj) if req.fully_within: operator = SFCGAL_Contains3D(ref_geo.geometry, tar_geo.geometry) else: operator = or_( SFCGAL_Contains3D(ref_geo.geometry, tar_geo.geometry), SFCGAL_Intersects3D(ref_geo.geometry, tar_geo.geometry) ) ids = db().query( tar_obj.id ).\ filter( ref_obj.id == req.reference_object_id, ref_obj.absolute_description_id == ref_geo.abstraction_desc, ref_geo.type == "BoundingBox", tar_obj.id.in_( tar_ids ), tar_obj.absolute_description_id == tar_geo.abstraction_desc, tar_geo.type == "BoundingBox", operator ).all() res.target_object_ids = [id for id, in ids] return res
def get_objects_within_range2d( req ): rospy.loginfo( "SEMAP DB SRVs: get_objects_within_range2d" ) res = GetObjectsWithinRange2DResponse() obj = aliased( ObjectInstance ) geo = aliased( GeometryModel ) print req.object_types, req.point, req.geometry_type, req.distance if req.geometry_type not in ["Position2D", "AxisAligned2D", "FootprintBox", "FootprintHull"]: rospy.logerr("SEMAP DB SRVs: get_objects_within_range2d was called with %s which is not a valid 2D geometry type" % req.geometry_type) else: rospy.loginfo("SEMAP DB SRVs: get_objects_within_range2d tries to find em") if req.object_types: obj_ids = any_obj_types_ids(obj, req.object_types) else: obj_ids = any_obj_ids(obj) if req.fully_within: ids = db().query( obj.id ).filter( obj.id.in_( obj_ids ), \ obj.absolute_description_id == geo.abstraction_desc, geo.type == req.geometry_type, \ ST_DFullyWithin(fromPoint2D(req.point),geo.geometry, req.distance) ).all() else: ids = db().query( obj.id ).filter( obj.id.in_( obj_ids ), \ obj.absolute_description_id == geo.abstraction_desc, geo.type == req.geometry_type, \ ST_DWithin(fromPoint2D(req.point), geo.geometry, req.distance) ).all() res.ids = [id for id, in ids] return res
def get_objects_within_range( req ): rospy.loginfo( "SEMAP DB SRVs: get_objects_within_range" ) res = GetObjectsWithinRangeResponse() tar_obj = aliased( ObjectInstance ) tar_geo = aliased( GeometryModel ) if req.target_object_types: tar_ids = any_obj_types_ids(tar_obj, req.target_object_types) else: tar_ids = any_obj_ids(tar_obj) if req.fully_within: operator = ST_3DDFullyWithin(fromPoint3D(req.reference_point), tar_geo.geometry, req.distance) else: operator = ST_3DDWithin(fromPoint3D(req.reference_point), tar_geo.geometry, req.distance) results = db().query( tar_obj.id, ST_3DDistance( fromPoint3D(req.reference_point), tar_geo.geometry), ST_3DMaxDistance( fromPoint3D(req.reference_point), tar_geo.geometry), ST_3DClosestPoint(tar_geo.geometry, fromPoint3D(req.reference_point)) ).\ filter( tar_obj.id.in_( tar_ids ), tar_obj.absolute_description_id == tar_geo.abstraction_desc, tar_geo.type == req.target_object_geometry_type, operator).\ order_by( ST_3DDistance( fromPoint3D(req.reference_point), tar_geo.geometry) ).all() for i, min, max, point in results: print i, min, max, point pair = ObjectPair() pair.reference_id = -1 pair.target_id = i pair.max_dist = max pair.max_dist_line[0] = req.reference_point pair.max_dist_line[1] = toPoint3D( point ) pair.min_dist = min pair.min_dist_line[0] = req.reference_point pair.min_dist_line[1] = toPoint3D( point ) res.pairs.append(pair) return res
def get_objects_within_volume( req ): rospy.loginfo( "SEMAP DB SRVs: get_objects_within_area" ) res = GetObjectsWithinVolumeResponse() tar_obj = aliased( ObjectInstance ) tar_geo = aliased( GeometryModel ) if req.target_object_types: tar_ids = any_obj_types_ids(tar_obj, req.target_object_types) else: tar_ids = any_obj_ids(tar_obj) if req.fully_within: operator = SFCGAL_Contains3D(fromPolygonMesh3D(req.reference_mesh), tar_geo.geometry) else: operator = or_( SFCGAL_Contains3D(fromPolygonMesh3D(req.reference_mesh), tar_geo.geometry), SFCGAL_Intersects3D(fromPolygonMesh3D(req.reference_mesh), tar_geo.geometry) ) results = db().query( tar_obj.id ).\ filter( tar_obj.id.in_( tar_ids ), tar_obj.absolute_description_id == tar_geo.abstraction_desc, tar_geo.type == req.target_object_geometry_type, operator).all() for i in results: pair = ObjectPair() pair.reference_id = -1 pair.target_id = i[0] pair.relations.append("contained-in-volume") res.pairs.append(pair) return res '''
def get_match(cls, **kwargs): with scoped_session() as session: team1 = aliased(Team) team2 = aliased(Team) query = session.query(cls).filter_by(**kwargs)\ .outerjoin(team1, cls.team1_name == team1.name)\ .outerjoin(team2, cls.team1_name == team2.name) match = query.all()[0] match_dict = match.as_dict() latest_ball_res = Ball.get_latest_ball(session, match.id) latest_ball = latest_ball_res['latest_ball'] bowler_id = latest_ball_res['bowler_id'] total_runs = latest_ball_res['total_runs'] runs = latest_ball_res['runs'] is_wide = latest_ball_res['is_wide'] is_no_ball = latest_ball_res['is_no_ball'] # import pdb; pdb.set_trace() total_wides = Ball.get_total_wides(session, match.id) total_no_balls = Ball.get_total_no_balls(session, match.id) total_byes = Ball.get_total_byes(session, match.id) match_dict['current_bowler'] = bowler_id if latest_ball%6 < 6 else '' match_dict['over'] = get_over(latest_ball, is_wide, is_no_ball) match_dict['total_runs'] = total_runs + total_wides + total_no_balls + total_byes match_dict['wides'] = total_wides match_dict['no_balls'] = total_no_balls match_dict['byes'] = total_byes match_dict['last_ball'] = runs match_dict['next_ball'] = next_ball(latest_ball, is_wide, is_no_ball) match_dict['batting_score'] = get_batting_score(session, match) match_dict['bowling_score'] = get_bowling_score(session, match) return match_dict
def removeDupes(self): """Remove Duplicate Items""" print "Removing Dupes" from sqlalchemy.orm import aliased session = self.Session() a1 = aliased(Data) a2 = aliased(Data) query = session.query(a1,a2) query = query.filter(a1.datasource_key == a2.datasource_key) #Data sources should be unique query = query.filter(a1.node_key == a2.node_key) query = query.filter(a1.timestamp == a2.timestamp) query = query.filter(a1.data_key < a2.data_key) #query = query.limit(10) log.debug("Count of Items {0}".format(query.count())) #print query.limit(10).all() keyList = [item[1].data_key for item in query.all()] #log.debug(keyList) theQry = session.query(Data).filter(Data.data_key.in_(keyList)) delItems = theQry.delete(False) log.debug("Total of {0} items deleted".format(delItems)) session.commit() # for item in query.limit(10): #log.debug("{0} == {1}".format(item[0],item[1])) return
def query_index(): q = Dataset.all_by_account(current_user, order=False) q = q.order_by(Dataset.updated_at.desc()) # Filter by languages if they have been provided for language in request.args.getlist('languages'): l = aliased(DatasetLanguage) q = q.join(l, Dataset._languages) q = q.filter(l.code == language) # Filter by territories if they have been provided for territory in request.args.getlist('territories'): t = aliased(DatasetTerritory) q = q.join(t, Dataset._territories) q = q.filter(t.code == territory) # Return a list of languages as dicts with code, count, url and label languages = [{'code': code, 'count': count, 'label': LANGUAGES.get(code)} for (code, count) in DatasetLanguage.dataset_counts(q)] territories = [{'code': code, 'count': count, 'label': COUNTRIES.get(code)} for (code, count) in DatasetTerritory.dataset_counts(q)] pager = Pager(q, limit=15) return pager, languages, territories
def resolve_duplicates(model, attr, separator=u"-"): """Resolve duplicates on a model property Check and remove by renaming duplicate attribute for values. Args: model: model that will be checked attr: attribute that will be checked separator: (default -) Separator between old attr value and integer """ # pylint: disable=invalid-name v0, v1 = aliased(model, name="v0"), aliased(model, name="v1") query = db.session.query(v0).join(v1, and_( getattr(v0, attr) == getattr(v1, attr), v0.id > v1.id )) for v in query: i = 1 nattr = "{}{}{}".format(getattr(v, attr, model.type), separator, i) while db.session.query(model).\ filter(getattr(model, attr) == nattr).count(): i += 1 nattr = "{}{}{}".format(getattr(v, attr, model.type), separator, i) setattr(v, attr, nattr) db.session.add(v) db.session.flush() db.session.commit()
def _tenant_networks_by_network_query(self, context, network_id, bgp_speaker_id): """Return subquery for tenant networks by binding network ID""" address_scope = aliased(address_scope_db.AddressScope, name='address_scope') router_attrs = aliased(l3_attrs_db.RouterExtraAttributes, name='router_attrs') tenant_networks_query = context.session.query( l3_db.RouterPort.router_id, models_v2.Subnet.cidr, models_v2.Subnet.ip_version, address_scope.id) tenant_networks_query = tenant_networks_query.filter( l3_db.RouterPort.port_type != lib_consts.DEVICE_OWNER_ROUTER_GW, l3_db.RouterPort.port_type != lib_consts.DEVICE_OWNER_ROUTER_SNAT, l3_db.RouterPort.router_id == router_attrs.router_id, models_v2.IPAllocation.port_id == l3_db.RouterPort.port_id, models_v2.IPAllocation.subnet_id == models_v2.Subnet.id, models_v2.Subnet.network_id != network_id, models_v2.Subnet.subnetpool_id == models_v2.SubnetPool.id, models_v2.SubnetPool.address_scope_id == address_scope.id, BgpSpeaker.id == bgp_speaker_id, BgpSpeaker.ip_version == address_scope.ip_version, models_v2.Subnet.ip_version == address_scope.ip_version) return tenant_networks_query
def export_aliases(project, fh): """ Dump a list of all entity names to a CSV file. The table will contain the active name of each entity, and one of the other existing names as an alias. """ writer = DictWriter(fh, ['entity_id', 'alias', 'canonical']) writer.writeheader() alias = aliased(EntityProperty) canonical = aliased(EntityProperty) q = db.session.query(alias.value_string.label('alias'), alias.entity_id) q = q.join(Entity) q = q.join(canonical) q = q.filter(Entity.project_id==project.id) q = q.filter(alias.entity_id!=None) q = q.filter(alias.name=='name') q = q.filter(canonical.name=='name') q = q.filter(canonical.active==True) q = q.add_columns(canonical.value_string.label('canonical')) for row in q.all(): #if row.alias == row.canonical: # continue writer.writerow({ 'entity_id': str(row.entity_id), 'alias': row.alias, 'canonical': row.canonical })
def __correspondence_query__(self, chain1, chain2): """Create a query for correspondences between the two chains. This only checks in the given direction. Parameters ---------- chain1 : int The first chain id. chain2 : int The second chain id. Returns ------- corr_id : int The correspondence id if there is an alignment between the two chains. """ with self.session() as session: info = mod.CorrespondenceInfo mapping1 = aliased(mod.ExpSeqChainMapping) mapping2 = aliased(mod.ExpSeqChainMapping) query = session.query(info.correspondence_id).\ join(mapping1, mapping1.exp_seq_id == info.exp_seq_id_1).\ join(mapping2, mapping2.exp_seq_id == info.exp_seq_id_2).\ filter(mapping1.chain_id == chain1).\ filter(mapping2.chain_id == chain2).\ filter(info.good_alignment == 1) result = query.first() if result: return result.correspondence_id return None
def get(self): i = request.args today = datetime.datetime.now().strftime('%Y%m%d') date = i.get('date', today).strip() type_ = i.get('type', 'total').strip() # 收入类型:card/cash/total if type_ in ('card', 'total'): if date == today: TransTable = aliased(Trans) else: TransTable = aliased(HistoryTrans) q_card = db_session.query(func.sum(HistoryTrans.amount).label('amount')) \ .filter(TransTable.trans_date == date) \ .filter(TransTable.trans_code == '000010') \ .filter(TransTable.status == '0') if session['user_level'] == 'unit': q_card = q_card.filter(TransTable.unit_no == session['unit_no']) if session['user_level'] == 'shop': q_card = q_card.filter(TransTable.shop_no == session['shop_no']) if type_ in ('cash', 'total'): q_cash = db_session.query(func.sum(SaleOrderInfo.cash_pay_amount).label('amount')) \ .filter(SaleOrderInfo.is_paid == True) \ .filter(SaleOrderInfo.pay_time == '20000000') if type_ == 'card': return jsonify(success=True, total=q_card.one().amount) if type_ == 'cash': return jsonify(success=True, total=q_cash.one().amount)
def create_new_repack_notifications(date_from=None, date_to=None, user_notifications=None): packer_channel = aliased(Channel, name="source_channel") packer_user = aliased(User, name="packer_user") repacker_channel = aliased(Channel, name="repacker_channel") repacker_user = aliased(User, name="repacker_user") activity_window = readonly_session.query(VideoInstance, packer_channel, repacker_channel, repacker_user).join( packer_channel, packer_channel.id == VideoInstance.source_channel ).join( packer_user, packer_user.id == packer_channel.owner ).join( repacker_channel, (repacker_channel.id == VideoInstance.channel) & (repacker_channel.favourite == False) & (repacker_channel.public == True) ).join( repacker_user, repacker_user.id == repacker_channel.owner ) if date_from: activity_window = activity_window.filter(VideoInstance.date_added >= date_from) if date_to: activity_window = activity_window.filter(VideoInstance.date_added < date_to) for video_instance, packer_channel, repacker_channel, repacker in activity_window: user, type, body = repack_message(repacker, repacker_channel, video_instance) _add_user_notification(packer_channel.owner, video_instance.date_added, type, body) if user_notifications is not None and type in app.config['PUSH_NOTIFICATION_MAP']: user_notifications.setdefault(packer_channel.owner, None)
def apifriends(): # this is for dev src_user_id = set_src_user_id() # dev end try: lastid = int(request.args.get('lastid', 0)) except: raise InvalidParam('invalid lastid') from_table = aliased(Greeting) to_table = aliased(Greeting) friends = db.session.query(Greeting.id, Greeting.dst_user_id, Account.uid, Account.provider, Greeting.created_at)\ .join(Account, Account.user_id == Greeting.dst_user_id)\ .filter(Greeting.src_user_id == src_user_id)\ .filter(Greeting.is_friend == True)\ .filter(Greeting.id > lastid).all() return jsonify({ "status": "success", "data": { "items": [dict(zip(['id', 'user_id', 'uid', 'provider', 'created_at'], [id, user_id, uid, provider, totimestamp(created_at)])) for id, user_id, uid, provider, created_at in friends] } })
def ak_jurnal_skpd_item_act(self): ses = self.request.session req = self.request params = req.params url_dict = req.matchdict pk_id = 'id' in params and params['id'] and int(params['id']) or 0 if url_dict['act']=='grid': ak_jurnal_id = url_dict['ak_jurnal_id'].isdigit() and url_dict['ak_jurnal_id'] or 0 columns = [] columns.append(ColumnDT('id')) columns.append(ColumnDT('sapkd')) columns.append(ColumnDT('sapnm')) columns.append(ColumnDT('amount', filter=self._number_format)) columns.append(ColumnDT('notes')) columns.append(ColumnDT('rekkd')) columns.append(ColumnDT('reknm')) columns.append(ColumnDT('kegiatan_sub_id')) columns.append(ColumnDT('rekening_id')) columns.append(ColumnDT('ak_jurnal_id')) columns.append(ColumnDT('subkd')) columns.append(ColumnDT('subnm')) rek = aliased(Rekening) sap = aliased(Sap) sub = aliased(KegiatanSub) query = DBSession.query(AkJurnalItem.id, sap.kode.label('sapkd'), sap.nama.label('sapnm'), AkJurnalItem.amount, AkJurnalItem.notes, rek.kode.label('rekkd'), rek.nama.label('reknm'), AkJurnalItem.kegiatan_sub_id, AkJurnalItem.rekening_id, AkJurnalItem.ak_jurnal_id, sub.kode.label('subkd'), sub.nama.label('subnm'), ).join(AkJurnal, ).outerjoin(rek, AkJurnalItem.rekening_id == rek.id ).outerjoin(sap, AkJurnalItem.sap_id == sap.id ).outerjoin(sub, AkJurnalItem.kegiatan_sub_id == sub.id ).filter(AkJurnalItem.ak_jurnal_id==ak_jurnal_id, AkJurnalItem.ak_jurnal_id==AkJurnal.id, ).group_by(AkJurnalItem.id, sap.kode.label('sapkd'), sap.nama.label('sapnm'), AkJurnalItem.amount, AkJurnalItem.notes, rek.kode.label('rekkd'), rek.nama.label('reknm'), AkJurnalItem.kegiatan_sub_id, AkJurnalItem.rekening_id, AkJurnalItem.ak_jurnal_id, sub.kode.label('subkd'), sub.nama.label('subnm'), ) rowTable = DataTables(req, AkJurnalItem, query, columns) return rowTable.output_result()
def search(self, phrase, content=False): """Perform the search on the index""" # we need to adjust the phrase to be a set of OR per word phrase = " OR ".join(phrase.split()) results = set() desc = Bmark.query.filter(Bmark.description.match(phrase)) tag_str = Bmark.query.filter(Bmark.tag_str.match(phrase)) ext = Bmark.query.filter(Bmark.extended.match(phrase)) results.update(set([bmark for bmark in desc.union(tag_str, ext).order_by(Bmark.stored).all()])) readable_res = [] if content: content = Readable.query.filter(Readable.content.match(phrase)) hashed = aliased(Hashed) qry = content.outerjoin((hashed, Readable.hashed)).options(contains_eager(Readable.hashed, alias=hashed)) bmarks = aliased(Bmark) qry = qry.outerjoin((bmarks, hashed.bmark)).options( contains_eager(Readable.hashed, hashed.bmark, alias=bmarks) ) res = qry.order_by(bmarks.stored).all() for read in res: readable_res.append(read.hashed.bmark[0]) results.update(set(readable_res)) return sorted(list(results), key=lambda res: res.stored, reverse=True)
def _get_context_relationships(): """Load list of objects related on contexts and objects types. This code handles the case when user is added as `Auditor` and should be able to see objects mapped to the `Program` on `My Work` page. Returns: objects (list((id, type, None))): Related objects """ user_role_query = db.session.query(UserRole.context_id).join( Role, UserRole.role_id == Role.id).filter(and_( UserRole.person_id == contact_id, Role.name == 'Auditor') ) _ct = aliased(all_models.Context, name="c") _rl = aliased(all_models.Relationship, name="rl") context_query = db.session.query( _rl.source_id.label('id'), _rl.source_type.label('type'), literal(None)).join(_ct, and_( _ct.id.in_(user_role_query), _rl.destination_id == _ct.related_object_id, _rl.destination_type == _ct.related_object_type, _rl.source_type.in_(model_names), )).union(db.session.query( _rl.destination_id.label('id'), _rl.destination_type.label('type'), literal(None)).join(_ct, and_( _ct.id.in_(user_role_query), _rl.source_id == _ct.related_object_id, _rl.source_type == _ct.related_object_type, _rl.destination_type.in_(model_names),))) return context_query
def context_relationship_query(contexts): """Load a list of objects related to the given contexts Args: contexts (list(int)): A list of context ids Returns: objects (list((id, type, None))): Related objects """ if not len(contexts): return [] _context = aliased(all_models.Context, name="c") _relationship = aliased(all_models.Relationship, name="rl") headers = (case([ (_relationship.destination_type == _context.related_object_type, _relationship.source_id.label('id')) ], else_=_relationship.destination_id.label('id')), case([ (_relationship.destination_type == _context.related_object_type, _relationship.source_type.label('type')) ], else_=_relationship.destination_type.label('type')), literal(None)) return db.session.query(*headers).join(_context, and_( _context.id.in_(contexts), _relationship.destination_id == _context.related_object_id, _relationship.destination_type == _context.related_object_type, )).union(db.session.query(*headers).join(_context, and_( _context.id.in_(contexts), _relationship.source_id == _context.related_object_id, _relationship.source_type == _context.related_object_type, ))).all()
def _get_central_fip_host_routes_by_router(self, context, router_id, bgp_speaker_id): """Get floating IP host routes with the given router as nexthop.""" with context.session.begin(subtransactions=True): dest_alias = aliased(l3_db.FloatingIP, name='destination') next_hop_alias = aliased(models_v2.IPAllocation, name='next_hop') binding_alias = aliased(BgpSpeakerNetworkBinding, name='binding') router_attrs = aliased(l3_attrs_db.RouterExtraAttributes, name='router_attrs') query = context.session.query(dest_alias.floating_ip_address, next_hop_alias.ip_address) query = query.join( next_hop_alias, next_hop_alias.network_id == dest_alias.floating_network_id) query = query.join(l3_db.Router, dest_alias.router_id == l3_db.Router.id) query = query.filter( l3_db.Router.id == router_id, dest_alias.router_id == l3_db.Router.id, l3_db.Router.id == router_attrs.router_id, router_attrs.distributed == sa.sql.false(), l3_db.Router.gw_port_id == next_hop_alias.port_id, next_hop_alias.subnet_id == models_v2.Subnet.id, models_v2.Subnet.ip_version == 4, binding_alias.network_id == models_v2.Subnet.network_id, binding_alias.bgp_speaker_id == bgp_speaker_id, binding_alias.ip_version == 4, BgpSpeaker.advertise_floating_ip_host_routes == sa.sql.true()) query = query.outerjoin(router_attrs, l3_db.Router.id == router_attrs.router_id) query = query.filter(router_attrs.distributed != sa.sql.true()) return self._host_route_list_from_tuples(query.all())
def test_aliased_value(self): A = self._fixture() eq_(str(aliased(A).value == 5), "upper(a_1.value) = upper(:upper_1)")
def schedule_queued_recipes(*args): session.begin() try: # This query returns a queued host recipe and and the guest which has # the most recent distro tree. It is to be used as a derived table. latest_guest_distro = select([machine_guest_map.c.machine_recipe_id.label('host_id'), func.max(DistroTree.date_created).label('latest_distro_date')], from_obj=[machine_guest_map.join(GuestRecipe.__table__, machine_guest_map.c.guest_recipe_id==GuestRecipe.__table__.c.id). \ join(Recipe.__table__).join(DistroTree.__table__)], whereclause=Recipe.status=='Queued', group_by=machine_guest_map.c.machine_recipe_id).alias() hosts_lab_controller_distro_map = aliased(LabControllerDistroTree) hosts_distro_tree = aliased(DistroTree) guest_recipe = aliased(Recipe) guests_distro_tree = aliased(DistroTree) guests_lab_controller = aliased(LabController) # This query will return queued recipes that are eligible to be scheduled. # They are determined to be eligible if: # * They are clean # * There are systems available (see the filter criteria) in lab controllers where # the recipe's distro tree is available. # * If it is a host recipe, the most recently created distro of all # the guest recipe's distros is available in at least one of the same # lab controllers as that of the host's distro tree. # # Also note that we do not try to handle the situation where the guest and host never # have a common labcontroller. In that situation the host and guest would stay queued # until that situation was rectified. recipes = MachineRecipe.query\ .join(Recipe.recipeset, RecipeSet.job)\ .filter(Job.dirty_version == Job.clean_version)\ .outerjoin((guest_recipe, MachineRecipe.guests))\ .outerjoin((guests_distro_tree, guest_recipe.distro_tree_id == guests_distro_tree.id))\ .outerjoin((latest_guest_distro, and_(latest_guest_distro.c.host_id == MachineRecipe.id, latest_guest_distro.c.latest_distro_date == \ guests_distro_tree.date_created)))\ .outerjoin(guests_distro_tree.lab_controller_assocs, guests_lab_controller)\ .join(Recipe.systems)\ .join((hosts_distro_tree, hosts_distro_tree.id == MachineRecipe.distro_tree_id))\ .join((hosts_lab_controller_distro_map, hosts_distro_tree.lab_controller_assocs), (LabController, and_( hosts_lab_controller_distro_map.lab_controller_id == LabController.id, System.lab_controller_id == LabController.id)))\ .filter( and_(Recipe.status == TaskStatus.queued, System.user == None, LabController.disabled == False, or_( RecipeSet.lab_controller == None, RecipeSet.lab_controller_id == System.lab_controller_id, ), or_( System.loan_id == None, System.loan_id == Job.owner_id, ), or_( # We either have no guest guest_recipe.id == None, # Or we have a guest of which the latest # is in a common lab controller. and_(guests_lab_controller.id == LabController.id, latest_guest_distro.c.latest_distro_date != None ), ) # or ) # and ) # Get out of here if we have no recipes if not recipes.count(): return False # This should be the guest recipe with the latest distro. # We return it in this query, to save us from re-running the # derived table query in schedule_queued_recipe() recipes = recipes.add_column(guest_recipe.id) # Effective priority is given in the following order: # * Multi host recipes with already scheduled siblings # * Priority level (i.e Normal, High etc) # * RecipeSet id # * Recipe id recipes = recipes.order_by(RecipeSet.lab_controller == None). \ order_by(RecipeSet.priority.desc()). \ order_by(RecipeSet.id). \ order_by(MachineRecipe.id) # Don't do a GROUP BY before here, it is not needed. recipes = recipes.group_by(MachineRecipe.id) log.debug("Entering schedule_queued_recipes") for recipe_id, guest_recipe_id in recipes.values( MachineRecipe.id, guest_recipe.id): session.begin(nested=True) try: schedule_queued_recipe(recipe_id, guest_recipe_id) session.commit() except (StaleSystemUserException, InsufficientSystemPermissions, StaleTaskStatusException), e: # Either # System user has changed before # system allocation # or # System permissions have changed before # system allocation # or # Something has moved our status on from queued # already. log.warn(str(e)) session.rollback() except Exception, e: log.exception('Error in schedule_queued_recipe(%s)', recipe_id) session.rollback() session.begin(nested=True) try: recipe = MachineRecipe.by_id(recipe_id) recipe.recipeset.abort( u"Aborted in schedule_queued_recipe: %s" % e) session.commit() except Exception, e: log.exception( "Error during error handling in schedule_queued_recipe: %s" % e) session.rollback()
def go(): ma = sa.inspect(aliased(A)) m1._path_registry[m1.attrs.bs][ma][m1.attrs.bar]
def go(): u1 = aliased(User) inspect(u1)
def go(): u1 = aliased(User) inspect(u1)._path_registry[User.addresses.property]
def test_any_two(self): sess = Session() calias = aliased(Company) any_ = calias.employees.of_type(Engineer).any( Engineer.primary_language == 'cobol') eq_(sess.query(calias).filter(any_).one(), self.c2)
def _query_db(self, mapping): """Build a query to retrieve data from the local db. Includes columns from the mapping as well as joining to the id tables to get real SF ids for lookups. """ model = self.models[mapping.get("table")] # Use primary key instead of the field mapped to SF Id fields = mapping.get("fields", {}).copy() if mapping["oid_as_pk"]: del fields["Id"] id_column = model.__table__.primary_key.columns.keys()[0] columns = [getattr(model, id_column)] for name, f in fields.items(): if name != "RecordTypeId": columns.append(model.__table__.columns[f]) lookups = { lookup_field: lookup for lookup_field, lookup in mapping.get("lookups", {}).items() if "after" not in lookup } for lookup in lookups.values(): lookup["aliased_table"] = aliased( self.metadata.tables[f"{lookup['table']}_sf_ids"]) columns.append(lookup["aliased_table"].columns.sf_id) if "RecordTypeId" in mapping["fields"]: rt_dest_table = self.metadata.tables[mapping["sf_object"] + "_rt_target_mapping"] columns.append(rt_dest_table.columns.record_type_id) query = self.session.query(*columns) if "record_type" in mapping and hasattr(model, "record_type"): query = query.filter(model.record_type == mapping["record_type"]) if "filters" in mapping: filter_args = [] for f in mapping["filters"]: filter_args.append(text(f)) query = query.filter(*filter_args) if "RecordTypeId" in mapping["fields"]: rt_source_table = self.metadata.tables[mapping["sf_object"] + "_rt_mapping"] rt_dest_table = self.metadata.tables[mapping["sf_object"] + "_rt_target_mapping"] query = query.outerjoin( rt_source_table, rt_source_table.columns.record_type_id == getattr( model, mapping["fields"]["RecordTypeId"]), ) query = query.outerjoin( rt_dest_table, rt_dest_table.columns.developer_name == rt_source_table.columns.developer_name, ) for sf_field, lookup in lookups.items(): # Outer join with lookup ids table: # returns main obj even if lookup is null key_field = get_lookup_key_field(lookup, sf_field) value_column = getattr(model, key_field) query = query.outerjoin( lookup["aliased_table"], lookup["aliased_table"].columns.id == value_column, ) # Order by foreign key to minimize lock contention # by trying to keep lookup targets in the same batch lookup_column = getattr(model, key_field) query = query.order_by(lookup_column) self.logger.debug(str(query)) return query
def test_aliased_expression(self): A = self._fixture() self.assert_compile( aliased(A).value.__clause_element__(), "foo(a_1.value) + bar(a_1.value)", )
def get_measures(self): QA = model.QuestionNode QB = aliased(model.QuestionNode, name='qnode_b') MA = model.Measure MB = aliased(model.Measure, name='measure_b') QMA = model.QnodeMeasure QMB = aliased(model.QnodeMeasure, name='qnode_measure_b') start = perf() # Find modified / relocated measures measure_mod_query = ( self.session.query(MA, MB).join(MB, MA.id == MB.id).join( QMA, (QMA.program_id == MA.program_id) & (QMA.measure_id == MA.id)).join( QMB, (QMB.program_id == MB.program_id) & (QMB.measure_id == MB.id)).join( QA, (QMA.program_id == QA.program_id) & (QMA.qnode_id == QA.id)).join( QB, (QMB.program_id == QB.program_id) & (QMB.qnode_id == QB.id)) # Basic survey membership .filter(QA.program_id == self.program_id_a, QB.program_id == self.program_id_b, QA.survey_id == self.survey_id, QB.survey_id == self.survey_id) # Filter for modified objects .filter((MA.title != MB.title) | (MA.description != MB.description) | (MA.response_type_id != MB.response_type_id) | (MA.weight != MB.weight) | (QMA.qnode_id != QMB.qnode_id) | (QMA.seq != QMB.seq))) # Find deleted measures measure_del_query = (self.session.query( MA, literal(None)).select_from(MA).join( QMA, (QMA.program_id == MA.program_id) & (QMA.measure_id == MA.id)).join( QA, (QMA.program_id == QA.program_id) & (QMA.qnode_id == QA.id)).filter( QA.program_id == self.program_id_a, QA.survey_id == self.survey_id, ~QMA.measure_id.in_( self.session.query(QMB.measure_id).join( QB, (QMB.program_id == QB.program_id) & (QMB.qnode_id == QB.id)).filter( QB.program_id == self.program_id_b, QB.survey_id == self.survey_id)))) # Find added measures measure_add_query = (self.session.query( literal(None), MB).select_from(MB).join( QMB, (QMB.program_id == MB.program_id) & (QMB.measure_id == MB.id)).join( QB, (QMB.program_id == QB.program_id) & (QMB.qnode_id == QB.id)).filter( QB.program_id == self.program_id_b, QB.survey_id == self.survey_id, ~QMB.measure_id.in_( self.session.query(QMA.measure_id).join( QA, (QMA.program_id == QA.program_id) & (QMA.qnode_id == QA.id)).filter( QA.program_id == self.program_id_a, QA.survey_id == self.survey_id)))) measures = list(measure_mod_query.all() + measure_add_query.all() + measure_del_query.all()) duration = perf() - start self.timing.append("Primary measure query took %gs" % duration) return measures
def test_aliased_expression(self): A = self._fixture() self.assert_compile( aliased(A).value(5), "foo(a_1.value, :foo_1) + :foo_2")
# querying for instance in session.query(User).order_by(User.id): print(instance.name, instance.fullname) for name, fullname in session.query(User.name, User.fullname): print(name, fullname) for row in session.query(User, User.name).all(): print(row.User, row.name) # label() SQL:AS for row in session.query(User.name.label('name_label')).all(): print(row.name_label) # aliased() user_alias = aliased(User, name='user_alias') for row in session.query(user_alias, user_alias.name).all(): print(row.user_alias) # SQL: LIMIT:2 OFFSET:1 for u in session.query(User).order_by(User.id)[1:3]: print(u) for name, in session.query(User.name).filter(User.fullname == 'Ed Jones'): print(name) for user in session.query(User).filter(User.name == 'ed').filter(User.fullname == 'Ed jones'): print(user) # common filter operators # equals
def run(self): logger = "atlasDiscovery" log = logging.getLogger(logger) self.mysql_conn = None self.mysql_cursor = None self.configDBSession = None self.configDBEngine = None self.debugLogLevel = False atlasEnabled = True # self.atlasOperation = atlas_operations.atlasOperation(logger) if logging.root.level == 10: # DEBUG self.debugLogLevel = True self.atlasCrawlerProcessQueue = Queue() self.atlasCrawlerResultQueue = Queue() self.jdbcConnectionMutex = threading.Lock() # Fetch configuration about MySQL database and how to connect to it self.configHostname = configuration.get("Database", "mysql_hostname") self.configPort = configuration.get("Database", "mysql_port") self.configDatabase = configuration.get("Database", "mysql_database") self.configUsername = configuration.get("Database", "mysql_username") self.configPassword = configuration.get("Database", "mysql_password") atlasCrawlerObjects = [] atlasCrawlerThreads = int(configuration.get("Server", "atlas_threads")) if atlasCrawlerThreads == 0: log.info( "Atlas discovery disabled as the number of threads is set to 0" ) atlasEnabled = False else: log.info("Starting %s Atlas crawler threads" % (atlasCrawlerThreads)) for threadID in range(0, atlasCrawlerThreads): # if distCP_separate_logs == False: atlasCrawlerLogName = "atlasCrawler-thread%s" % (str(threadID)) # else: # distCPlogName = "distCP-thread%s"%(str(threadID)) thread = atlasCrawler( name=str(threadID), atlasCrawlerProcessQueue=self.atlasCrawlerProcessQueue, atlasCrawlerResultQueue=self.atlasCrawlerResultQueue, threadStopEvent=self.threadStopEvent, loggerName=atlasCrawlerLogName, mutex=self.jdbcConnectionMutex) thread.daemon = True thread.start() atlasCrawlerObjects.append(thread) self.common_config = common_config.config() jdbcConnections = aliased(configSchema.jdbcConnections) self.failureLog = {} self.connectionsSentToCrawlers = [] # The interval between the scans. This is in hours atlasDiscoveryInterval = self.common_config.getConfigValue( key="atlas_discovery_interval") # if atlasEnabled == True: # atlasEnabled = self.atlasOperation.checkAtlasSchema(logger=logger) if atlasEnabled == True: log.info("atlasDiscovery started") log.info("Atlas discovery interval is set to %s hours" % (atlasDiscoveryInterval)) while not self.threadStopEvent.isSet() and atlasEnabled == True: # **************************************************************** # Read data from jdbc_connection and put in queue for processing # **************************************************************** if self.atlasCrawlerProcessQueue.qsize() < atlasCrawlerThreads: # Only read the database if there isn't enough items in the queue to the crawlers to processes. This will save # a large number of sql requests if the queue is full try: # Read a list of connection aliases that we are going to process in this iteration session = self.getDBImportSession() atlasDiscoveryCheckTime = datetime.utcnow() - timedelta( hours=atlasDiscoveryInterval) # TODO: Antagligen bara köra denna om jdbcConnectionsDf är tom från föregående körning jdbcConnectionsDf = pd.DataFrame( session.query( jdbcConnections.dbalias, jdbcConnections.atlas_last_discovery, jdbcConnections.atlas_discovery, jdbcConnections.contact_info, jdbcConnections.description, jdbcConnections.owner, jdbcConnections.atlas_include_filter, jdbcConnections.atlas_exclude_filter).select_from( jdbcConnections).filter( jdbcConnections.atlas_discovery == 1) # .filter((jdbcConnections.atlas_last_discovery < atlasDiscoveryCheckTime) | (jdbcConnections.atlas_last_discovery == None)) .order_by(jdbcConnections.atlas_last_discovery).all()) session.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) session.rollback() self.disconnectDBImportDB() else: for index, row in jdbcConnectionsDf.iterrows(): dbAlias = row['dbalias'] # TODO: Flytta denna till thread if self.common_config.checkTimeWindow( dbAlias, atlasDiscoveryMode=True) == False: continue # Find out if the dbAlias is blacklisted if self.isConnectionBlacklisted(dbAlias) == True: continue if dbAlias in self.connectionsSentToCrawlers: # log.warning("This connection is already being processed. Skipping....") continue altasOperationFailed = False printBlackListWarning = True self.common_config.mysql_conn.commit() try: self.common_config.lookupConnectionAlias(dbAlias) except invalidConfiguration as err: if self.common_config.atlasJdbcSourceSupport == True: log.error( "Connection '%s' have invalid configuration. Failed with '%s'" % (dbAlias, err)) altasOperationFailed = True if self.common_config.atlasJdbcSourceSupport == False: # This source type does not support Atlas discovery log.debug( "Connection '%s' does not support Atlas discovery. Skipping..." % (dbAlias)) altasOperationFailed = True printBlackListWarning = False # Start the Jpype JVM as that needs to be running before the crawlers starts to use it if jpype.isJVMStarted() == False: log.debug("Starting jpype JVM") self.common_config.connectToJDBC( allJarFiles=True, exitIfFailure=False, logger=logger, printError=False) self.common_config.disconnectFromJDBC() # if altasOperationFailed == False and self.common_config.connectToJDBC(allJarFiles=True, exitIfFailure=False, logger=logger) == True: if altasOperationFailed == False: # self.common_config.atlasEnabled = True self.connectionsSentToCrawlers.append(dbAlias) log.debug("Sending alias '%s' to queue" % (dbAlias)) atlasCrawlerRequest = {} atlasCrawlerRequest["dbAlias"] = row['dbalias'] atlasCrawlerRequest["contactInfo"] = row[ 'contact_info'] atlasCrawlerRequest["description"] = row[ 'description'] atlasCrawlerRequest["owner"] = row['owner'] atlasCrawlerRequest["atlasIncludeFilter"] = row[ 'atlas_include_filter'] atlasCrawlerRequest["atlasExcludeFilter"] = row[ 'atlas_exclude_filter'] atlasCrawlerRequest[ "jdbc_hostname"] = self.common_config.jdbc_hostname atlasCrawlerRequest[ "jdbc_port"] = self.common_config.jdbc_port atlasCrawlerRequest[ "jdbc_servertype"] = self.common_config.jdbc_servertype atlasCrawlerRequest[ "jdbc_database"] = self.common_config.jdbc_database atlasCrawlerRequest[ "jdbc_oracle_sid"] = self.common_config.jdbc_oracle_sid atlasCrawlerRequest[ "jdbc_oracle_servicename"] = self.common_config.jdbc_oracle_servicename atlasCrawlerRequest[ "jdbc_username"] = self.common_config.jdbc_username atlasCrawlerRequest[ "jdbc_password"] = self.common_config.jdbc_password atlasCrawlerRequest[ "jdbc_driver"] = self.common_config.jdbc_driver atlasCrawlerRequest[ "jdbc_url"] = self.common_config.jdbc_url atlasCrawlerRequest[ "jdbc_classpath_for_python"] = self.common_config.jdbc_classpath_for_python atlasCrawlerRequest[ "jdbc_environment"] = self.common_config.jdbc_environment atlasCrawlerRequest["hdfs_address"] = None atlasCrawlerRequest["cluster_name"] = None self.atlasCrawlerProcessQueue.put( atlasCrawlerRequest) else: # altasOperationFailed = True # if altasOperationFailed == True: self.blacklistConnection(dbAlias, printBlackListWarning) # ******************************** # Read response from atlasCrawler # ******************************** try: atlasCrawlerResult = self.atlasCrawlerResultQueue.get( block=False, timeout=1) except Empty: atlasCrawlerResult = None if atlasCrawlerResult is not None: dbAlias = atlasCrawlerResult.get('dbAlias') result = atlasCrawlerResult.get('result') blacklist = atlasCrawlerResult.get('blacklist') log.debug("atlasCrawlerResultQueue: %s" % (atlasCrawlerResult)) self.connectionsSentToCrawlers.remove(dbAlias) if result == True: updateDict = {} updateDict["atlas_last_discovery"] = str( datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')) try: session = self.getDBImportSession() (session.query(configSchema.jdbcConnections).filter( configSchema.jdbcConnections.dbalias == dbAlias).update(updateDict)) session.commit() session.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) session.rollback() self.disconnectDBImportDB() else: self.removeBlacklist(dbAlias) else: if blacklist == True: log.error( "Connection '%s' failed during crawling of database schema" % (dbAlias)) self.blacklistConnection(dbAlias) else: log.warning( "A Warning was detected when crawling connection '%s'. It will not be marked as completed and will retry the operation" % (dbAlias)) time.sleep(1) self.disconnectDBImportDB() if atlasEnabled == True: log.info("atlasDiscovery stopped")
def test_polymorphic_any_five(self): sess = create_session() calias = aliased(Company) any_ = calias.employees.of_type(Engineer).any( Engineer.primary_language == 'cobol') eq_(sess.query(calias).filter(any_).one(), c2)
def execute(self): qnode_pairs = self.get_qnodes() measure_pairs = self.get_measures() to_son = ToSon( r'/id$', r'/survey_id$', r'/parent_id$', r'/title$', r'</description$', r'/response_type_id$', r'/seq$', # Descend r'/[0-9]+$', r'^/[0-9]+/[^/]+$', ) if self.include_scores: to_son.add(r'/weight$', ) qnode_pairs = self.realise_soft_deletion(qnode_pairs) qnode_pairs = self.remove_soft_deletion_dups(qnode_pairs) qnode_diff = [{ 'type': 'qnode', 'tags': [], 'pair': pair, } for pair in to_son(qnode_pairs)] start = perf() self.add_qnode_metadata(qnode_pairs, qnode_diff) self.add_metadata(qnode_pairs, qnode_diff) duration = perf() - start self.timing.append("Qnode metadata took %gs" % duration) self.remove_unchanged_fields(qnode_diff) measure_pairs = self.realise_soft_deletion(measure_pairs) measure_pairs = self.remove_soft_deletion_dups(measure_pairs) measure_diff = [{ 'type': 'measure', 'tags': [], 'pair': pair, } for pair in to_son(measure_pairs)] start = perf() self.add_measure_metadata(measure_pairs, measure_diff) self.add_metadata(measure_pairs, measure_diff) duration = perf() - start self.timing.append("Measure metadata took %gs" % duration) self.remove_unchanged_fields(measure_diff) diff = qnode_diff + measure_diff def path_key(diff_item): a, b = diff_item['pair'] if a and b: return 0, [int(c) for c in b['path'].split('.') if c != ''] elif b: return 0, [int(c) for c in b['path'].split('.') if c != ''] elif a: return 1, [int(c) for c in a['path'].split('.') if c != ''] else: return 2 start = perf() diff.sort(key=path_key) duration = perf() - start self.timing.append("Sorting took %gs" % duration) HA = model.Survey HB = aliased(model.Survey, name='survey_b') survey_a, survey_b = (self.session.query(HA, HB).join( HB, (HA.id == HB.id)).filter(HA.program_id == self.program_id_a, HB.program_id == self.program_id_b, HA.id == self.survey_id).first()) to_son = ToSon( r'/id$', r'/title$', r'</description$', ) top_level_diff = [{ 'type': 'program', 'tags': ['context'], 'pair': [to_son(survey_a.program), to_son(survey_b.program)] }, { 'type': 'survey', 'tags': ['context'], 'pair': [to_son(survey_a), to_son(survey_b)] }] self.remove_unchanged_fields(top_level_diff) return top_level_diff + diff
def apply_tag_filters(model, query, filters): """Apply tag filters There are four types of filter: `tags` -- One or more strings that will be used to filter results in an AND expression: T1 AND T2 `tags-any` -- One or more strings that will be used to filter results in an OR expression: T1 OR T2 `not-tags` -- One or more strings that will be used to filter results in a NOT AND expression: NOT (T1 AND T2) `not-tags-any` -- One or more strings that will be used to filter results in a NOT OR expression: NOT (T1 OR T2) Note: tag values can be specified comma separated string. for example, 'GET /v2.0/networks?tags-any=red,blue' is equivalent to 'GET /v2.0/networks?tags-any=red&tags-any=blue' it means 'red' or 'blue'. """ if 'tags' in filters: tags = _get_tag_list(filters.pop('tags')) first_tag = tags.pop(0) query = query.join(Tag, model.standard_attr_id == Tag.standard_attr_id) query = query.filter(Tag.tag == first_tag) for tag in tags: tag_alias = aliased(Tag) query = query.join( tag_alias, model.standard_attr_id == tag_alias.standard_attr_id) query = query.filter(tag_alias.tag == tag) if 'tags-any' in filters: tags = _get_tag_list(filters.pop('tags-any')) query = query.join(Tag, model.standard_attr_id == Tag.standard_attr_id) query = query.filter(Tag.tag.in_(tags)) if 'not-tags' in filters: tags = _get_tag_list(filters.pop('not-tags')) first_tag = tags.pop(0) subq = query.session.query(Tag.standard_attr_id) subq = subq.filter(Tag.tag == first_tag) for tag in tags: tag_alias = aliased(Tag) subq = subq.join( tag_alias, Tag.standard_attr_id == tag_alias.standard_attr_id) subq = subq.filter(tag_alias.tag == tag) query = query.filter(~model.standard_attr_id.in_(subq)) if 'not-tags-any' in filters: tags = _get_tag_list(filters.pop('not-tags-any')) subq = query.session.query(Tag.standard_attr_id) subq = subq.filter(Tag.tag.in_(tags)) query = query.filter(~model.standard_attr_id.in_(subq)) return query
def test_double_w_ac(self): ( users, orders, User, Address, Order, addresses, Item, items, order_items, ) = ( self.tables.users, self.tables.orders, self.classes.User, self.classes.Address, self.classes.Order, self.tables.addresses, self.classes.Item, self.tables.items, self.tables.order_items, ) mapper(Address, addresses) mapper( Order, orders, properties={ "items": relationship( Item, secondary=order_items, lazy="select", order_by=items.c.id, ) }, ) mapper(Item, items) open_mapper = aliased(Order, orders) closed_mapper = aliased(Order, orders) mapper( User, users, properties=dict( addresses=relationship(Address, lazy=True), open_orders=relationship( open_mapper, primaryjoin=sa.and_( open_mapper.isopen == 1, users.c.id == open_mapper.user_id, ), lazy="select", overlaps="closed_orders", ), closed_orders=relationship( closed_mapper, primaryjoin=sa.and_( closed_mapper.isopen == 0, users.c.id == closed_mapper.user_id, ), lazy="select", overlaps="open_orders", ), ), ) self._run_double_test()
def __search(self, tool_id, tool_version, user, input_data, job_state=None, param_dump=None, wildcard_param_dump=None): search_timer = ExecutionTimer() def replace_dataset_ids(path, key, value): """Exchanges dataset_ids (HDA, LDA, HDCA, not Dataset) in param_dump with dataset ids used in job.""" if key == 'id': current_case = param_dump for p in path: current_case = current_case[p] src = current_case['src'] value = job_input_ids[src][value] return key, value return key, value job_conditions = [ and_( model.Job.tool_id == tool_id, model.Job.user == user, model.Job.copied_from_job_id.is_( None) # Always pick original job ) ] if tool_version: job_conditions.append(model.Job.tool_version == str(tool_version)) if job_state is None: job_conditions.append( model.Job.state.in_([ model.Job.states.NEW, model.Job.states.QUEUED, model.Job.states.WAITING, model.Job.states.RUNNING, model.Job.states.OK ])) else: if isinstance(job_state, str): job_conditions.append(model.Job.state == job_state) elif isinstance(job_state, list): o = [] for s in job_state: o.append(model.Job.state == s) job_conditions.append(or_(*o)) for k, v in wildcard_param_dump.items(): wildcard_value = None if v == {'__class__': 'RuntimeValue'}: # TODO: verify this is always None. e.g. run with runtime input input v = None elif k.endswith('|__identifier__'): # We've taken care of this while constructing the conditions based on ``input_data`` above continue elif k == 'chromInfo' and '?.len' in v: continue wildcard_value = '"%?.len"' if not wildcard_value: value_dump = json.dumps(v, sort_keys=True) wildcard_value = value_dump.replace('"id": "__id_wildcard__"', '"id": %') a = aliased(model.JobParameter) if value_dump == wildcard_value: job_conditions.append( and_( model.Job.id == a.job_id, a.name == k, a.value == value_dump, )) else: job_conditions.append( and_(model.Job.id == a.job_id, a.name == k, a.value.like(wildcard_value))) job_conditions.append( and_( model.Job.any_output_dataset_collection_instances_deleted == false(), model.Job.any_output_dataset_deleted == false())) subq = self.sa_session.query( model.Job.id).filter(*job_conditions).subquery() data_conditions = [] # We now build the query filters that relate to the input datasets # that this job uses. We keep track of the requested dataset id in `requested_ids`, # the type (hda, hdca or lda) in `data_types` # and the ids that have been used in the job that has already been run in `used_ids`. requested_ids = [] data_types = [] used_ids = [] for k, input_list in input_data.items(): # k will be matched against the JobParameter.name column. This can be prefixed depending on whethter # the input is in a repeat, or not (section and conditional) k = {k, k.split('|')[-1]} for type_values in input_list: t = type_values['src'] v = type_values['id'] requested_ids.append(v) data_types.append(t) identifier = type_values['identifier'] if t == 'hda': a = aliased(model.JobToInputDatasetAssociation) b = aliased(model.HistoryDatasetAssociation) c = aliased(model.HistoryDatasetAssociation) d = aliased(model.JobParameter) e = aliased(model.HistoryDatasetAssociationHistory) stmt = select([ model.HistoryDatasetAssociation.id ]).where(model.HistoryDatasetAssociation.id == e.history_dataset_association_id) name_condition = [] if identifier: data_conditions.append( and_( model.Job.id == d.job_id, d.name.in_( {"%s|__identifier__" % _ for _ in k}), d.value == json.dumps(identifier))) else: stmt = stmt.where(e.name == c.name) name_condition.append(b.name == c.name) stmt = stmt.where(e.extension == c.extension, ).where( a.dataset_version == e.version, ).where( e._metadata == c._metadata, ) data_conditions.append( and_( a.name.in_(k), a.dataset_id == b.id, # b is the HDA used for the job c.dataset_id == b.dataset_id, c.id == v, # c is the requested job input HDA # We need to make sure that the job we are looking for has been run with identical inputs. # Here we deal with 3 requirements: # - the jobs' input dataset (=b) version is 0, meaning the job's input dataset is not yet ready # - b's update_time is older than the job create time, meaning no changes occurred # - the job has a dataset_version recorded, and that versions' metadata matches c's metadata. or_( and_( or_(a.dataset_version.in_([0, b.version]), b.update_time < model.Job.create_time), b.extension == c.extension, b.metadata == c.metadata, *name_condition, ), b.id.in_(stmt)), or_(b.deleted == false(), c.deleted == false()))) used_ids.append(a.dataset_id) elif t == 'ldda': a = aliased(model.JobToInputLibraryDatasetAssociation) data_conditions.append( and_(model.Job.id == a.job_id, a.name.in_(k), a.ldda_id == v)) used_ids.append(a.ldda_id) elif t == 'hdca': a = aliased(model.JobToInputDatasetCollectionAssociation) b = aliased(model.HistoryDatasetCollectionAssociation) c = aliased(model.HistoryDatasetCollectionAssociation) data_conditions.append( and_( model.Job.id == a.job_id, a.name.in_(k), b.id == a.dataset_collection_id, c.id == v, b.name == c.name, or_( and_(b.deleted == false(), b.id == v), and_( or_( c. copied_from_history_dataset_collection_association_id == b.id, b. copied_from_history_dataset_collection_association_id == c.id), c.deleted == false(), )))) used_ids.append(a.dataset_collection_id) elif t == 'dce': a = aliased( model.JobToInputDatasetCollectionElementAssociation) b = aliased(model.DatasetCollectionElement) c = aliased(model.DatasetCollectionElement) data_conditions.append( and_( model.Job.id == a.job_id, a.name.in_(k), a.dataset_collection_element_id == b.id, b.element_identifier == c.element_identifier, c.child_collection_id == b.child_collection_id, c.id == v, )) used_ids.append(a.dataset_collection_element_id) else: return [] query = self.sa_session.query(model.Job.id, *used_ids).join( subq, model.Job.id == subq.c.id).filter(*data_conditions).group_by( model.Job.id, *used_ids).order_by(model.Job.id.desc()) for job in query: # We found a job that is equal in terms of tool_id, user, state and input datasets, # but to be able to verify that the parameters match we need to modify all instances of # dataset_ids (HDA, LDDA, HDCA) in the incoming param_dump to point to those used by the # possibly equivalent job, which may have been run on copies of the original input data. job_input_ids = {} if len(job) > 1: # We do have datasets to check job_id, current_jobs_data_ids = job[0], job[1:] job_parameter_conditions = [model.Job.id == job_id] for src, requested_id, used_id in zip(data_types, requested_ids, current_jobs_data_ids): if src not in job_input_ids: job_input_ids[src] = {requested_id: used_id} else: job_input_ids[src][requested_id] = used_id new_param_dump = remap(param_dump, visit=replace_dataset_ids) # new_param_dump has its dataset ids remapped to those used by the job. # We now ask if the remapped job parameters match the current job. for k, v in new_param_dump.items(): if v == {'__class__': 'RuntimeValue'}: # TODO: verify this is always None. e.g. run with runtime input input v = None elif k.endswith('|__identifier__'): # We've taken care of this while constructing the conditions based on ``input_data`` above continue elif k == 'chromInfo' and '?.len' in v: continue wildcard_value = '"%?.len"' if not wildcard_value: wildcard_value = json.dumps(v, sort_keys=True).replace( '"id": "__id_wildcard__"', '"id": %') a = aliased(model.JobParameter) job_parameter_conditions.append( and_(model.Job.id == a.job_id, a.name == k, a.value == json.dumps(v, sort_keys=True))) else: job_parameter_conditions = [model.Job.id == job] query = self.sa_session.query( model.Job).filter(*job_parameter_conditions) job = query.first() if job is None: continue n_parameters = 0 # Verify that equivalent jobs had the same number of job parameters # We skip chrominfo, dbkey, __workflow_invocation_uuid__ and identifer # parameter as these are not passed along when expanding tool parameters # and they can differ without affecting the resulting dataset. for parameter in job.parameters: if parameter.name.startswith("__"): continue if parameter.name in { 'chromInfo', 'dbkey' } or parameter.name.endswith('|__identifier__'): continue n_parameters += 1 if not n_parameters == sum(1 for k in param_dump if not k.startswith('__') and not k.endswith('|__identifier__') and k not in {'chromInfo', 'dbkey'}): continue log.info("Found equivalent job %s", search_timer) return job log.info("No equivalent jobs found %s", search_timer) return None
def test_from_alias(self): sess = create_session() palias = aliased(Person) eq_(sess.query(palias) .filter(palias.name.in_(['dilbert', 'wally'])).all(), [e1, e2])
def get_stratified_daily_progress(query, event, location_type): response = [] ancestor_location = aliased(Location) location_closure = aliased(LocationPath) sample_sub = query.first() sub_location_type = sample_sub.location.location_type depth_info = LocationTypePath.query.filter_by( ancestor_id=location_type.id, descendant_id=sub_location_type.id).first() if depth_info: _query = query.join( location_closure, location_closure.descendant_id == Submission.location_id).join( ancestor_location, ancestor_location.id == location_closure.ancestor_id ).filter( location_closure.depth == depth_info.depth ).with_entities( ancestor_location.name, ancestor_location.code, Submission.participant_updated ).options( Load(ancestor_location).load_only( 'id', 'code', 'name_translations') ).group_by(ancestor_location.id, Submission.participant_updated) df = pd.read_sql( _query.statement, _query.session.bind, index_col=['participant_updated'], parse_dates=['participant_updated']).tz_localize(TIMEZONE) df['count'] = 1 tz = timezone(TIMEZONE) start = tz.localize(datetime.combine( event.start.astimezone(tz), datetime.min.time())) end = tz.localize( datetime.combine(event.end.astimezone(tz), datetime.min.time())) locations = Location.query.filter( Location.location_set == location_type.location_set, Location.location_type == location_type) for location in locations: if location.name not in df.loc[df.index.notnull()]['getter'].unique(): # noqa df = df.append(pd.DataFrame( {'getter': location.name, 'count': 0, 'code': location.code}, # noqa index=[start])) df2 = df.loc[df.index.notnull()].groupby(['getter', 'code']).resample('D').sum() # noqa df2 = df2.sort_index(level='code') df2.index = df2.index.droplevel('code') for location in df2.index.get_level_values(0).unique(): # noqa df_resampled = df2.loc[location].append( pd.DataFrame({'count': 0}, index=[start])).append( pd.DataFrame({'count': 0}, index=[end])).resample( 'D').sum() progress = df_resampled.truncate(before=start, after=end) progress.loc[progress.index == start.strftime( '%Y-%m-%d'), 'count'] = int( df_resampled[df_resampled.index <= start].sum()) progress.loc[progress.index == end.strftime( '%Y-%m-%d'), 'count'] = int( df_resampled[df_resampled.index >= end].sum()) dp = { idx.date(): int(progress.loc[idx]['count']) for idx in progress.index } dp.update({'total': progress['count'].sum()}) response.append({'name': location, 'data': dp}) return response
#!/usr/bin/env python3 #--coding: utf8-- from sqlalchemy.orm import sessionmaker, aliased from d12_04 import session, Employees, Departments, Salary from sqlalchemy import and_, or_ new_emp = aliased(Employees) #等于 for row in session.query(new_emp.name, new_emp.phone).filter(new_emp.name == 'john'): print(row.name, row.phone) print('#' * 50) #不等于 for row in session.query(new_emp.name, new_emp.phone).filter(new_emp.name != 'john'): print(row.name, row.phone) print('#' * 50) #模糊查询 for row in session.query(new_emp.name, new_emp.phone).filter(new_emp.name.like('j%')): print(row.name, row.phone) print('#' * 50) #并列查询 for row in session.query(new_emp.name, new_emp.phone).filter(new_emp.name.in_(['bob', 'john'])): print(row.name, row.phone) print('#' * 50) #并列不在 for row in session.query(new_emp.name, new_emp.phone).filter(~new_emp.name.in_(['bob', 'john'])): print(row.name, row.phone) print('#' * 50) #空 for row in session.query(new_emp.name, new_emp.phone).filter(new_emp.name.is_(None)): print(row.name, row.phone) print('#' * 50)
def search_datasets(dataset_id=None, dataset_name=None, collection_name=None, data_type=None, unit_id=None, scenario_id=None, metadata_key=None, metadata_val=None, attr_id=None, type_id=None, unconnected=None, inc_metadata='N', inc_val='N', page_start=0, page_size=2000, **kwargs): """ Get multiple datasets, based on several filters. If all filters are set to None, all datasets in the DB (that the user is allowe to see) will be returned. """ log.info("Searching datasets: \ndatset_id: %s,\n" "datset_name: %s,\n" "collection_name: %s,\n" "data_type: %s,\n" "unit_id: %s,\n" "scenario_id: %s,\n" "metadata_key: %s,\n" "metadata_val: %s,\n" "attr_id: %s,\n" "type_id: %s,\n" "unconnected: %s,\n" "inc_metadata: %s,\n" "inc_val: %s,\n" "page_start: %s,\n" "page_size: %s" % (dataset_id, dataset_name, collection_name, data_type, unit_id, scenario_id, metadata_key, metadata_val, attr_id, type_id, unconnected, inc_metadata, inc_val, page_start, page_size)) if page_size is None: page_size = config.get('SEARCH', 'page_size', 2000) user_id = int(kwargs.get('user_id')) dataset_qry = db.DBSession.query(Dataset.id, Dataset.type, Dataset.unit_id, Dataset.name, Dataset.hidden, Dataset.cr_date, Dataset.created_by, DatasetOwner.user_id, null().label('metadata'), Dataset.value) #Dataset ID is unique, so there's no point using the other filters. #Only use other filters if the datset ID is not specified. if dataset_id is not None: dataset_qry = dataset_qry.filter(Dataset.id == dataset_id) else: if dataset_name is not None: dataset_qry = dataset_qry.filter( func.lower(Dataset.name).like("%%%s%%" % dataset_name.lower())) if collection_name is not None: dc = aliased(DatasetCollection) dci = aliased(DatasetCollectionItem) dataset_qry = dataset_qry.join( dc, func.lower(dc.name).like( "%%%s%%" % collection_name.lower())).join( dci, and_(dci.collection_id == dc.id, dci.dataset_id == Dataset.id)) if data_type is not None: dataset_qry = dataset_qry.filter( func.lower(Dataset.type) == data_type.lower()) #null is a valid unit, so we need a way for the searcher #to specify that they want to search for datasets with a null unit #rather than ignoring the unit. We use 'null' to do this. if unit_id is not None: dataset_qry = dataset_qry.filter(Dataset.unit_id == unit_id) if scenario_id is not None: dataset_qry = dataset_qry.join( ResourceScenario, and_(ResourceScenario.dataset_id == Dataset.id, ResourceScenario.scenario_id == scenario_id)) if attr_id is not None: dataset_qry = dataset_qry.join( ResourceScenario, ResourceScenario.dataset_id == Dataset.id).join( ResourceAttr, and_(ResourceAttr.id == ResourceScenario.resource_attr_id, ResourceAttr.attr_id == attr_id)) if type_id is not None: dataset_qry = dataset_qry.join( ResourceScenario, ResourceScenario.dataset_id == Dataset.id).join( ResourceAttr, ResourceAttr.id == ResourceScenario.resource_attr_id).join( TypeAttr, and_(TypeAttr.attr_id == ResourceAttr.attr_id, TypeAttr.type_id == type_id)) if unconnected == 'Y': stmt = db.DBSession.query( distinct(ResourceScenario.dataset_id).label('dataset_id'), literal_column("0").label('col')).subquery() dataset_qry = dataset_qry.outerjoin( stmt, stmt.c.dataset_id == Dataset.id) dataset_qry = dataset_qry.filter(stmt.c.col == None) elif unconnected == 'N': #The dataset has to be connected to something stmt = db.DBSession.query( distinct(ResourceScenario.dataset_id).label('dataset_id'), literal_column("0").label('col')).subquery() dataset_qry = dataset_qry.join(stmt, stmt.c.dataset_id == Dataset.id) if metadata_key is not None and metadata_val is not None: dataset_qry = dataset_qry.join( Metadata, and_( Metadata.dataset_id == Dataset.id, func.lower(Metadata.key).like("%%%s%%" % metadata_key.lower()), func.lower(Metadata.value).like("%%%s%%" % metadata_val.lower()))) elif metadata_key is not None and metadata_val is None: dataset_qry = dataset_qry.join( Metadata, and_( Metadata.dataset_id == Dataset.id, func.lower(Metadata.key).like("%%%s%%" % metadata_key.lower()))) elif metadata_key is None and metadata_val is not None: dataset_qry = dataset_qry.join( Metadata, and_( Metadata.dataset_id == Dataset.id, func.lower(Metadata.value).like("%%%s%%" % metadata_val.lower()))) #All datasets must be joined on dataset owner so only datasets that the #user can see are retrieved. dataset_qry = dataset_qry.outerjoin( DatasetOwner, and_(DatasetOwner.dataset_id == Dataset.id, DatasetOwner.user_id == user_id)) dataset_qry = dataset_qry.filter( or_(Dataset.hidden == 'N', and_(DatasetOwner.user_id is not None, Dataset.hidden == 'Y'))) log.info(str(dataset_qry)) datasets = dataset_qry.all() log.info("Retrieved %s datasets", len(datasets)) #page the datasets: if page_start + page_size > len(datasets): page_end = None else: page_end = page_start + page_size datasets = datasets[page_start:page_end] log.info("Datasets paged from result %s to %s", page_start, page_end) datasets_to_return = [] for dataset_row in datasets: dataset_dict = dataset_row._asdict() if inc_val == 'N': dataset_dict['value'] = None else: #convert the value row into a string as it is returned as a binary if dataset_row.value is not None: dataset_dict['value'] = str(dataset_row.value) if inc_metadata == 'Y': metadata = db.DBSession.query(Metadata).filter( Metadata.dataset_id == dataset_row.dataset_id).all() dataset_dict['metadata'] = metadata else: dataset_dict['metadata'] = [] dataset = namedtuple('Dataset', dataset_dict.keys())(**dataset_dict) datasets_to_return.append(dataset) return datasets_to_return
from enum import Enum from typing import Dict, Union from sqlalchemy.orm import aliased from sqlalchemy.orm.util import AliasedClass from nmdc_server import models EnvBroadScaleAncestor = aliased(models.EnvoAncestor) EnvBroadScaleTerm = aliased(models.EnvoTerm) EnvLocalScaleAncestor = aliased(models.EnvoAncestor) EnvLocalScaleTerm = aliased(models.EnvoTerm) EnvMediumAncestor = aliased(models.EnvoAncestor) EnvMediumTerm = aliased(models.EnvoTerm) MetaPGeneFunction = aliased(models.GeneFunction) class Table(Enum): biosample = "biosample" study = "study" omics_processing = "omics_processing" reads_qc = "reads_qc" metagenome_assembly = "metagenome_assembly" metagenome_annotation = "metagenome_annotation" metaproteomic_analysis = "metaproteomic_analysis" mags_analysis = "mags_analysis" nom_analysis = "nom_analysis" read_based_analysis = "read_based_analysis" metabolomics_analysis = "metabolomics_analysis" gene_function = "gene_function" metap_gene_function = "metap_gene_function"
def get_data_in_project(self, project_id, visibility_filter=None, filter_value=None): """ Get all the DataTypes for a given project, including Linked Entities and DataType Groups. :param visibility_filter: when not None, will filter by DataTye fields :param filter_value: when not None, will filter with ilike multiple DataType string attributes """ resulted_data = [] try: ## First Query DT, DT_gr, Lk_DT and Lk_DT_gr query = self.session.query(model.DataType).join( (model.Operation, model.Operation.id == model.DataType.fk_from_operation) ).join(model.Algorithm).join(model.AlgorithmGroup).join( model.AlgorithmCategory).outerjoin( (model.Links, and_(model.Links.fk_from_datatype == model.DataType.id, model.Links.fk_to_project == project_id)) ).outerjoin( model.BurstConfiguration, model.DataType. fk_parent_burst == model.BurstConfiguration.id).filter( model.DataType.fk_datatype_group == None).filter( or_(model.Operation.fk_launched_in == project_id, model.Links.fk_to_project == project_id)) if visibility_filter: filter_str = visibility_filter.get_sql_filter_equivalent() if filter_str is not None: query = query.filter(eval(filter_str)) if filter_value is not None: query = query.filter( self._compose_filter_datatype_ilike(filter_value)) resulted_data = query.all() ## Now query what it was not covered before: ## Links of DT which are part of a group, but the entire group is not linked links = aliased(model.Links) query2 = self.session.query(model.DataType).join( (model.Operation, model.Operation.id == model.DataType.fk_from_operation) ).join(model.Algorithm).join(model.AlgorithmGroup).join( model.AlgorithmCategory).join( (model.Links, and_( model.Links.fk_from_datatype == model.DataType.id, model.Links.fk_to_project == project_id))).outerjoin( links, and_( links.fk_from_datatype == model.DataType.fk_datatype_group, links.fk_to_project == project_id)).outerjoin( model.BurstConfiguration, model.DataType.fk_parent_burst == model.BurstConfiguration.id).filter( model.DataType.fk_datatype_group != None).filter(links.id == None) if visibility_filter: filter_str = visibility_filter.get_sql_filter_equivalent() if filter_str is not None: query2 = query2.filter(eval(filter_str)) if filter_value is not None: query2 = query2.filter( self._compose_filter_datatype_ilike(filter_value)) resulted_data.extend(query2.all()) # Load lazy fields for future usage for dt in resulted_data: dt._parent_burst dt.parent_operation.algorithm dt.parent_operation.algorithm.algo_group dt.parent_operation.algorithm.algo_group.group_category dt.parent_operation.project dt.parent_operation.operation_group dt.parent_operation.user except Exception, excep: self.logger.exception(excep)
def add_text_search(query, join_columns, keywords, locales, include_rank=True, lse=None): from assembl.models.langstrings import LangStringEntry rank = None keywords_j = ' & '.join(keywords) lse = lse or aliased(LangStringEntry) join_conds = [ lse.langstring_id == join_column for join_column in join_columns ] if len(join_conds) > 1: join_cond = or_(*join_conds) else: join_cond = join_conds[0] query = query.join(lse, join_cond) if locales: active_text_indices = get('active_text_indices', 'en') locales_by_config = defaultdict(list) any_locale = 'any' in locales for locale in locales: fts_config = postgres_language_configurations.get(locale, 'simple') if fts_config not in active_text_indices: fts_config = 'simple' locales_by_config[fts_config].append(locale) conds = {} # TODO: to_tsquery vs plainto_tsquery vs phraseto_tsquery for fts_config, locales in locales_by_config.items(): conds[fts_config] = (or_( *[((lse.locale == locale) | lse.locale.like(locale + "_%")) for locale in locales]) if 'any' not in locales else None, func.to_tsvector(fts_config, lse.value)) filter = [ cond & v.match(keywords_j, postgresql_regconfig=conf) for (conf, (cond, v)) in conds.items() if cond is not None ] if any_locale: (_, v) = conds['simple'] filter.append(v.match(keywords_j, postgresql_regconfig='simple')) query = query.filter(or_(*filter)) if include_rank: if len(conds) > 1: if any_locale: (_, v) = conds['simple'] else_case = func.ts_rank( v, func.to_tsquery('simple', keywords_j)) else: else_case = 0 rank = case( [(cond, func.ts_rank(v, func.to_tsquery(conf, keywords_j))) for (conf, (cond, v)) in conds.items() if cond is not None], else_=else_case).label('score') else: (conf, (cond, v)) = next(iter(conds.items())) rank = func.ts_rank(v, func.to_tsquery(conf, keywords_j)).label('score') query = query.add_column(rank) else: fts_config = 'simple' query = query.filter( func.to_tsvector(fts_config, lse.value).match(keywords_j, postgresql_regconfig=fts_config)) if include_rank: rank = func.ts_rank(func.to_tsvector(fts_config, lse.value), func.to_tsquery(fts_config, keywords_j)).label('score') query = query.add_column(rank) return query, rank
def WhereInJoinTable(self, query, criteriaObj): ''' Override parent function to include management of Observation/Protocols and fieldWorkers ''' query = super().WhereInJoinTable(query, criteriaObj) curProp = criteriaObj['Column'] if curProp == 'FK_ProtocoleType': o = aliased(Observation) subSelect = select([o.ID]).where( and_( Station.ID == o.FK_Station, eval_.eval_binary_expr(o._type_id, criteriaObj['Operator'], criteriaObj['Value']))) query = query.where(exists(subSelect)) if curProp == 'Species': obsValTable = Base.metadata.tables['ObservationDynPropValuesNow'] o2 = aliased(Observation) s2 = aliased(Station) joinStaObs = join(s2, o2, s2.ID == o2.FK_Station) operator = criteriaObj['Operator'] if 'not' in criteriaObj['Operator']: operator = operator.replace('not ', '').replace(' not', '') existInd = select([Individual.ID]).where( and_( o2.FK_Individual == Individual.ID, eval_.eval_binary_expr(Individual.Species, operator, criteriaObj['Value']))) existObs = select([obsValTable.c['ID']]).where( and_( obsValTable.c['FK_Observation'] == o2.ID, and_( or_(obsValTable.c['Name'].like('%taxon'), obsValTable.c['Name'].like('%species%')), eval_.eval_binary_expr(obsValTable.c['ValueString'], operator, criteriaObj['Value'])))) selectCommon = select([s2.ID]).select_from(joinStaObs) selectInd = selectCommon.where(exists(existInd)) selectObs = selectCommon.where(exists(existObs)) unionQuery = union_all(selectInd, selectObs) if 'not' in criteriaObj['Operator']: query = query.where(~Station.ID.in_(unionQuery)) else: query = query.where(Station.ID.in_(unionQuery)) if curProp == 'FK_Individual': if criteriaObj['Operator'].lower() in ['is null', 'is not null']: subSelect = select([Observation]).where( and_(Station.ID == Observation.FK_Station, Observation.__table__.c[curProp] != None)) if criteriaObj['Operator'].lower() == 'is': query = query.where(~exists(subSelect)) else: query = query.where(exists(subSelect)) else: subSelect = select([Observation]).where( and_( Station.ID == Observation.FK_Station, eval_.eval_binary_expr( Observation.__table__.c[curProp], criteriaObj['Operator'], criteriaObj['Value']))) query = query.where(exists(subSelect)) if curProp == 'FK_FieldWorker': joinTable = join(Station_FieldWorker, User, Station_FieldWorker.FK_FieldWorker == User.id) subSelect = select( [Station_FieldWorker]).select_from(joinTable).where( and_( Station.ID == Station_FieldWorker.FK_Station, eval_.eval_binary_expr(User.__table__.c['Login'], criteriaObj['Operator'], criteriaObj['Value']))) query = query.where(exists(subSelect)) if curProp == 'LastImported': st = aliased(Station) subSelect2 = select([st]).where( cast(st.creationDate, DATE) > cast(Station.creationDate, DATE)) query = query.where(~exists(subSelect2)) return query
from sqlalchemy.orm import Session, aliased from sqlalchemy.orm.query import Query as RawQuery from sqlalchemy.sql.expression import or_ from ..models import ( DBID, Issue, IssueInstance, IssueInstanceSharedTextAssoc, SharedText, SharedTextKind, ) # pyre-fixme[5]: Global expression must be annotated. FilenameText = aliased(SharedText) # pyre-fixme[5]: Global expression must be annotated. CallableText = aliased(SharedText) # pyre-fixme[5]: Global expression must be annotated. CallerText = aliased(SharedText) # pyre-fixme[5]: Global expression must be annotated. CalleeText = aliased(SharedText) # pyre-fixme[5]: Global expression must be annotated. MessageText = aliased(SharedText) class Filter(Enum): codes = "codes" callables = "callables" file_names = "file_names" trace_length_to_sources = "trace_length_to_sources"
def get_area_map_query(self): from orm.entities import Election, Area from orm.entities.Area import AreaAreaModel from orm.enums import AreaTypeEnum country = db.session.query(Area.Model).filter( Area.Model.areaType == AreaTypeEnum.Country).subquery() electoral_district = db.session.query(Area.Model).filter( Area.Model.areaType == AreaTypeEnum.ElectoralDistrict).subquery() polling_division = db.session.query(Area.Model).filter( Area.Model.areaType == AreaTypeEnum.PollingDivision).subquery() polling_district = db.session.query(Area.Model).filter( Area.Model.areaType == AreaTypeEnum.PollingDistrict).subquery() polling_station = db.session.query(Area.Model).filter( Area.Model.areaType == AreaTypeEnum.PollingStation).subquery() counting_centre = db.session.query(Area.Model).filter( Area.Model.areaType == AreaTypeEnum.CountingCentre).subquery() district_centre = db.session.query(Area.Model).filter( Area.Model.areaType == AreaTypeEnum.DistrictCentre).subquery() election_commission = db.session.query(Area.Model).filter( Area.Model.areaType == AreaTypeEnum.ElectionCommission).subquery() country__electoral_district = aliased(AreaAreaModel) electoral_district__polling_division = aliased(AreaAreaModel) polling_division__polling_district = aliased(AreaAreaModel) polling_district__polling_station = aliased(AreaAreaModel) counting_centre__polling_station = aliased(AreaAreaModel) district_centre__counting_centre = aliased(AreaAreaModel) election_commission__district_centre = aliased(AreaAreaModel) query = db.session.query( country.c.areaId.label("countryId"), country.c.areaName.label("countryName"), electoral_district.c.areaId.label("electoralDistrictId"), electoral_district.c.areaName.label("electoralDistrictName"), polling_division.c.areaId.label("pollingDivisionId"), polling_division.c.areaName.label("pollingDivisionName"), polling_district.c.areaId.label("pollingDistrictId"), polling_district.c.areaName.label("pollingDistrictName"), polling_station.c.areaId.label("pollingStationId"), polling_station.c.areaName.label("pollingStationName"), counting_centre.c.areaId.label("countingCentreId"), counting_centre.c.areaName.label("countingCentreName"), Election.Model.voteType).filter( country__electoral_district.parentAreaId == country.c.areaId, country__electoral_district.childAreaId == electoral_district.c.areaId, electoral_district__polling_division.parentAreaId == electoral_district.c.areaId, electoral_district__polling_division.childAreaId == polling_division.c.areaId, polling_division__polling_district.parentAreaId == polling_division.c.areaId, polling_division__polling_district.childAreaId == polling_district.c.areaId, polling_district__polling_station.parentAreaId == polling_district.c.areaId, polling_district__polling_station.childAreaId == polling_station.c.areaId, counting_centre__polling_station. parentAreaId == counting_centre.c.areaId, counting_centre__polling_station.childAreaId == polling_station.c.areaId, district_centre__counting_centre. parentAreaId == district_centre.c.areaId, district_centre__counting_centre.childAreaId == counting_centre.c.areaId, election_commission__district_centre. parentAreaId == election_commission.c.areaId, election_commission__district_centre.childAreaId == district_centre.c.areaId, Election.Model.electionId == counting_centre.c.electionId) return query
def test_aliased_class(self): User = self.classes.User ua = aliased(User) ua_insp = inspect(ua) path = PathRegistry.coerce((ua_insp, ua_insp.mapper.attrs.addresses)) assert path.parent.is_aliased_class
def _join_snapshots(cls, id_): """Retrieves related objects with snapshots Performs a query where it first: 1) Find all directly mapped snapshots 2) Join with snapshots to find type and id of snapshots (child_type and child_id) - left snapshots 3) Join with snapshots to find snapshots with the same child_type and child_id (right_snapshots) 4) Find all objects mapped to right snapshots (right_relationships) Arg: id_: ID of instance performing similarity query on Return: [(related_type, similar_id, similar_type)] where related type is the type related objects, similar_id and similar_type being id and type of second tier objects. """ left_snapshot = aliased(Snapshot, name="left_snapshot") right_snapshot = aliased(Snapshot, name="right_snapshot") left_relationship = aliased(Relationship, name="left_relationship") right_relationship = aliased(Relationship, name="right_relationship") snapshot_ids = select([ left_relationship.destination_id.label("snapshot_left_id"), ]).where( and_( left_relationship.source_type == cls.__name__, left_relationship.source_id == id_, left_relationship.destination_type == "Snapshot")).union( select([ left_relationship.source_id.label("snapshot_left_id"), ]).where( and_( left_relationship.destination_type == cls.__name__, left_relationship.destination_id == id_, left_relationship.source_type == "Snapshot"))).alias("snapshot_ids") left_snapshot_join = snapshot_ids.outerjoin( left_snapshot, left_snapshot.id == snapshot_ids.c.snapshot_left_id) right_snapshot_join = left_snapshot_join.outerjoin( right_snapshot, and_(right_snapshot.child_type == left_snapshot.child_type, right_snapshot.child_id == left_snapshot.child_id)).alias( "right_snapshot_join") return [ db.session.query( right_snapshot_join.c.right_snapshot_child_type.label( "related_type"), right_relationship.source_id.label("similar_id"), right_relationship.source_type.label("similar_type"), ).filter( and_( right_relationship.destination_type == "Snapshot", right_relationship.destination_id == right_snapshot_join.c.right_snapshot_id, right_relationship.source_type == cls.__name__)), db.session.query( right_snapshot_join.c.right_snapshot_child_type.label( "related_type"), right_relationship.destination_id.label("similar_id"), right_relationship.destination_type.label("similar_type"), ).filter( and_( right_relationship.source_type == "Snapshot", right_relationship.source_id == right_snapshot_join.c.right_snapshot_id, right_relationship.destination_type == cls.__name__)) ]
def filter_query(request: Request, query: Query, Model: RestalchemyBase, filter_by: str, value: str) -> Query: negate = filter_by.endswith("!") # negate filter when last char is ! less_equal = filter_by.endswith("<") # use less equal when last char is < greater_equal = filter_by.endswith( ">") # use greater equal when last char is > filter_by = filter_by.rstrip("!<>") # When a filter ends with an underscore, simply ignore it. # Appending '_' is allowed to avoid a conflict with a reserved word like # 'limit' or 'offset' etc. if filter_by.endswith("_"): filter_by = filter_by[:-1] # FilterModel is the model who's attribute will be used for the filter FilterModel: RestalchemyBase = Model if "." in filter_by: # when filtered by a full name assume a join model2_name, filter_by = filter_by.split(".", 1) Model2 = request.restalchemy_get_model(model2_name) if not Model2: raise AttributeNotFound(model2_name) if Model != Model2: try: FilterModel = Model2 = aliased(Model2) # type: ignore # LEFT JOIN so you can query `Model2.attr='null'` query = query.outerjoin(Model2) # FIXME: specify join argument like # ``query = query.outerjoin(Model2, Model.model2_name)`` # otherwise sqla can't find the join with some multiple filters like: # `/v3/creatives?bans.reason=null&advertiser.network_id!=9&sort=quickstats.today.clicks.desc` # Allow 1 more join to filter stuff like /campaigns?profile.segments_filter.segments_id=14 if "." in filter_by: model3_name, filter_by = filter_by.split(".", 1) Model3 = request.restalchemy_get_model(model3_name) if not Model3: raise AttributeNotFound(model3_name) # Join Model 3 with Model2 (SQLAlchemy knows how) # and filter attribute on Model3 if Model3 not in [Model, Model2]: FilterModel = aliased(Model3) # type: ignore query = query.outerjoin(FilterModel) except InvalidRequestError: raise AttributeWrong(model2_name) # FIXME: ???? validate list (otherwise DBAPIError is raised)! try: filter_attr = getattr(FilterModel, filter_by) except AttributeError: raise AttributeNotFound(filter_by) # If filter_attr is n to m relationship, the value is always `IN` and # we have to join the secondary table. if isinstance(filter_attr, InstrumentedAttribute) and hasattr( filter_attr.property, "secondary"): target: Any = request.restalchey_get_model( filter_attr.property.target.name) # Without `DISTINCT` models matching multiple filter values will return multiple # rows which sqlalchemy combines to one, resulting in less rows total then # the specified `limit` rows query = query.outerjoin(filter_attr) if isinstance(value, str) and value.lower() == "null": if negate: return query.filter(target.id != None) else: return query.filter(target.id == None) elif negate: return query.filter(target.id.notin_(value.split(","))).distinct() else: return query.filter(target.id.in_(value.split(","))).distinct() # else if isinstance(value, str) and "," in value: if less_equal or greater_equal: raise FilterInvalid( msg="Less or greater equal only allowed with single values.") if negate: return query.filter(filter_attr.notin_(value.split(","))) return query.filter(filter_attr.in_(value.split(","))) if isinstance(value, str) and "*" in value: if less_equal or greater_equal: raise FilterInvalid( msg="Less or greater equal is not allowed for wildcards (`*`)." ) validate(value, filter_attr) value = value.replace("*", "%") if negate: return query.filter(~getattr(FilterModel, filter_by).like(value)) else: return query.filter(getattr(FilterModel, filter_by).like(value)) if isinstance(value, str) and value.lower() == "null": value = None # type: ignore validate(value, filter_attr) if negate: return query.filter(filter_attr != value) if less_equal: return query.filter(filter_attr <= value) if greater_equal: return query.filter(filter_attr >= value) return query.filter(filter_attr == value)