def filterEvents(query, enames=None, lvls=None, lvlsets=None, workers=None, assignments=None, typ=None): if enames is not None: query = query.filter(Event.experiment.in_(enames)) if lvls is not None: query = query.filter( func.json_extract(Event.payload, "$.lvlid").in_(lvls)) if lvlsets is not None: query = query.filter( func.json_extract(Event.payload, "$.lvlset").in_(lvlsets)) if workers is not None: query = query.filter( func.json_extract(Event.payload, "$.workerId").in_(workers)) if assignments is not None: query = query.filter( func.json_extract(Event.payload, "$.assignmentId").in_(assignments)) if typ is not None: query = query.filter(Event.type == typ) return query
def get_balance(self): if self.owner not in Wallet.ledger: balance = 0 q = '%%"%s"%%' % self.owner blocks = self.session.query( func.json_extract(Block.data, '$.transactions[*].to'), func.json_extract(Block.data, '$.transactions[*].from'), func.json_extract(Block.data, '$.transactions[*].amount'), ).filter( or_( func.json_extract(Block.data, '$.transactions[*].to').like(q), func.json_extract( Block.data, '$.transactions[*].from').like(q))).all() for block in blocks: _t, _f, _a = block _t = json.loads(_t) _f = json.loads(_f) _a = json.loads(_a) data = [{ 't': t, 'f': f, 'a': a } for t, f, a in zip(_t, _f, _a)] for transaction in data: if transaction['f'] == self.owner: balance -= transaction['a'] elif transaction['t'] == self.owner: balance += transaction['a'] Wallet.ledger[self.owner] = balance return Wallet.ledger[self.owner]
def get_crawlingtask_info(objectid: str, PIC: str, actionid: str): if actionid == V4CrawlingTaskActionMaster.ARTIST_ALBUM_IMAGE: url = "url" else: url = "youtube_url" get_crawlingtask_info = (db_session.query( Crawlingtask.id, Crawlingtask.objectid, func.json_extract(Crawlingtask.taskdetail, f"$.{url}").label( "url"), func.json_extract(Crawlingtask.taskdetail, "$.when_exists").label( "when_exists"), Crawlingtask.status ) .select_from(Crawlingtask) .filter(Crawlingtask.objectid == objectid, Crawlingtask.actionid == actionid, func.json_extract(Crawlingtask.taskdetail, "$.PIC") == PIC, # to delete # func.json_extract(Crawlingtask.taskdetail, "$.data_source_format_id") == "1A67A5F1E0D84FB9B48234AE65086375", ) .order_by( Crawlingtask.created_at.desc()) ).first() return get_crawlingtask_info
def get_one_youtube_url_and_youtube_uploader_by_youtube_url(youtube_url: str): return db_session.query(DataSource).filter((DataSource.valid == 1), func.json_extract(DataSource.info, "$.source.uploader") != None, func.json_extract(DataSource.info, "$.source.title") != None, DataSource.source_uri == youtube_url, func.json_extract(DataSource.info, "$.source.title") != "_", ).order_by( DataSource.created_at.desc()).limit(1).all()
def events(session, **kwargs): query = session.query( Event, func.json_extract(Event.payload, "$.lvlid").label("lvlid"), func.json_extract(Event.payload, "$.lvlset").label("lvlset"), func.json_extract(Event.payload, "$.workerId").label("workerId"), func.json_extract(Event.payload, "$.assignmentId").label("assignmentId")) return filterEvents(query, **kwargs).all()
def get_track_wiki(trackid: tuple): track_wiki = (db_session.query(Track.id, Track.title, Track.artist, func.json_extract(Track.info, "$.wiki_url").label("wiki_url"), func.json_extract(Track.info, "$.wiki.brief").label("wiki_content") ) .select_from(Track) .filter(Track.id.in_(trackid)) ) return track_wiki
def get_album_wiki(albumuuid: tuple): album_wiki = (db_session.query(Album.uuid, Album.title, Album.artist, func.json_extract(Album.info, "$.wiki_url").label("wiki_url"), func.json_extract(Album.info, "$.wiki.brief").label("wiki_content") ) .select_from(Album) .filter(Album.uuid.in_(albumuuid)) ) return album_wiki
def generate_json_expansion_values(properties): property_query_cols = [] for prop_i, prop in enumerate(properties): prop_col = func.json_extract(metric_properties_j.properties, f"$[{prop_i}]").label(f"prop_{prop_i}") agg_col = func.json_unquote( func.json_extract(metric_aggregations_j.aggregations, f"$[{prop_i}]")).label(f"agg_{prop_i}") property_query_cols.append(prop_col) property_query_cols.append(agg_col) return property_query_cols
def get_youtube_title_and_youtube_uploader_from_youtube_url(youtube_url: str): datasourceid = (db_session.query( DataSource.source_uri.label('youtube_url'), func.json_extract(DataSource.info, "$.source.title").label( "youtube_title"), func.json_extract(DataSource.info, "$.source.uploader").label( "youtube_uploader")) .select_from(DataSource) .filter(DataSource.source_uri == youtube_url, DataSource.valid == 1 ) ).group_by(DataSource.track_id).order_by(DataSource.created_at.desc()) return datasourceid
def postpositional_case(conn, joined, *ands): return conn.execute( select([func.count(tables["ud_mwe"].c.id)]).where( and_( tables["ud_mwe"].c.typ == MweType.inflection, func.json_extract(tables["ud_mwe_token"].c.feats, "$.Case").isnot(None), func.json_extract(tables["ud_mwe_token"].c.feats, "$.Case") != "Par", func.json_extract(tables["ud_mwe_token"].c.feats, "$.Case") != "Gen", func.json_extract(tables["ud_mwe_token"].c.feats, "$.Case") != "Nom", *ands)).select_from(joined_token(joined))).scalar()
def get_lifecycle_state_details(self, session, resource_type_input): """Count of lifecycle states of the specified resources. Generate/return the count of lifecycle states (ACTIVE, DELETE_PENDING) of the specific resource type input (project, folder) for this inventory index. Args: session (object) : session object to work on. resource_type_input (str) : resource type to get lifecycle states. Returns: dict: a (lifecycle state -> count) dictionary """ resource_data = Inventory.resource_data details = dict( session.query( func.json_extract(resource_data, '$.lifecycleState'), func.count()).filter(Inventory.inventory_index_id == self.id). filter(Inventory.category == 'resource').filter( Inventory.resource_type == resource_type_input).group_by( func.json_extract(resource_data, '$.lifecycleState')).all()) LOGGER.debug('Lifecycle details for %s:\n%s', resource_type_input, details) # Lifecycle can be None if Forseti is installed to a non-org level. for key in list(details.keys()): if key is None: continue new_key = key.replace('\"', '').replace('_', ' ') new_key = ' - '.join([resource_type_input, new_key]) details[new_key] = details.pop(key) if len(details) == 1 and list(details.keys())[0] is None: return {} if len(details) == 1: # If the lifecycle state is DELETE PENDING or # LIFECYCLE STATE UNSPECIFIED the added_key_string # will be RESOURCE_TYPE - ACTIVE, which is then set # to 0. added_key_str = 'ACTIVE' if 'ACTIVE' in list(details.keys())[0]: added_key_str = 'DELETE PENDING' added_key = ' - '.join([resource_type_input, added_key_str]) details[added_key] = 0 return details
def get_youtube_info_from_trackid(track_ids: list, format_id): datasourceid = (db_session.query(DataSource.track_id.label('track_id'), DataSource.id.label('datasource_id'), DataSource.source_uri.label('youtube_url'), func.json_extract(DataSource.info, "$.source.title").label( "youtube_title"), func.json_extract(DataSource.info, "$.source.uploader").label( "youtube_uploader")) .select_from(DataSource) .filter(DataSource.track_id.in_(track_ids), DataSource.valid == 1, DataSource.format_id == format_id) ).group_by(DataSource.track_id).order_by(DataSource.created_at.desc()) return datasourceid
def verified_by_worker(lvl, worker, exp): s = session.query(VerifyData)\ .filter(VerifyData.lvl == lvl)\ .filter(func.json_extract(VerifyData.config, "$.mode") == "individual")\ .filter(func.json_extract(VerifyData.config, "$.enames[0]") == exp)\ .filter(func.json_extract(VerifyData.payload, "$.workers[0]") == worker) vs = s.all() if (len(vs) == 0): assert len( events(session, typ='InvariantFound', lvls=[lvl], workers=[worker])) == 0 return False assert (len(vs) == 1), "Not 1 VerifyData entry for {}, {}, {}".format( lvl, worker, exp) return vs[0].provedflag
def find(cls, publish=None, key_value=None, has_key=None): ''' Return PublishMetadata instances by query arguments Args: publish (Publish) : PublishMetadata parent Publish instance. key_value (Tuple) : PublishMetadata match {'key' : 'value}. has_key (str) : PublishMetadata has 'key'. Returns: A list of PublishMetadata instances matching find arguments. ''' query = cls.query() if publish: cls.assert_isinstance(publish, 'Publish') query = query.filter(cls.publish_id == publish.id) if key_value: assert isinstance( key_value, tuple), ('key_value are must be a tuple. Given {}'.format( type(key_value))) query = query.filter(PublishMetadata.metadata[key_value[0]] == cast(key_value[1], JSON)) if has_key: query = query.filter( func.json_extract(PublishMetadata.metadata, '$."{}"'.format( has_key)) != None) return query.all()
def get_crawl_E5_06_status(ituneid: list): crawlingtask_alias = aliased(Crawlingtask, name='crawlingtask_alias') crawl_E5_06_status = ( db_session.query(func.json_unquote(func.json_extract(Crawlingtask.taskdetail, "$.album_id")).label("ituneid"), Crawlingtask.status.label('06_status'), crawlingtask_alias.status.label('E5_status') ) .select_from(Crawlingtask) .outerjoin(crawlingtask_alias, text("crawlingtask_alias.id = Crawlingtasks.ext ->> '$.itunes_track_task_id'")) .filter(func.DATE(Crawlingtask.created_at) == func.current_date(), Crawlingtask.actionid == '9C8473C36E57472281A1C7936108FC06', func.json_extract(Crawlingtask.taskdetail, "$.album_id").in_(ituneid) ) ) return crawl_E5_06_status
def get_crawl_image_status(gsheet_name: str, sheet_name: str): crawl_artist_image_status = (db_session.query(Crawlingtask.id, Crawlingtask.actionid, Crawlingtask.objectid, Crawlingtask.taskdetail, Crawlingtask.status) .select_from(Crawlingtask) .filter(func.json_extract(Crawlingtask.taskdetail, "$.PIC") == f"{gsheet_name}_{sheet_name}", Crawlingtask.actionid == 'OA9CPKSUT6PBGI1ZHPLQUPQCGVYQ71S9') .order_by(Crawlingtask.objectid, Crawlingtask.created_at.desc()) ) return crawl_artist_image_status
def _json_path_clause(kv_pair): path_expr, value = kv_pair key, path = _parse_path_expr(path_expr) json_match = sa_func.json_contains( sa_func.json_extract(models.Variable.value, path), value) # NOTE(thomasem): Order is important here. MySQL will short-circuit and # not properly validate the JSON Path expression when the key doesn't exist # if the key match is first int he and_(...) expression. So, let's put # the json_match first. return and_(json_match, models.Variable.key == key)
def get_youtube_crawlingtask_info(track_id: str, PIC: str, format_id: str): get_crawlingtask_info = (db_session.query( Crawlingtask.id, Crawlingtask.objectid, func.json_extract(Crawlingtask.taskdetail, f"$.youtube_url").label( "youtube_url"), func.json_extract(Crawlingtask.taskdetail, "$.when_exists").label( "when_exists"), Crawlingtask.status ) .select_from(Crawlingtask) .filter(Crawlingtask.objectid == track_id, Crawlingtask.actionid == V4CrawlingTaskActionMaster.DOWNLOAD_VIDEO_YOUTUBE, func.json_extract(Crawlingtask.taskdetail, "$.PIC") == PIC, func.json_extract(Crawlingtask.taskdetail, "$.data_source_format_id") == format_id, Crawlingtask.priority != 10000, ) .order_by( Crawlingtask.created_at.desc()) ).first() return get_crawlingtask_info
def prog_exp(worker): """ Return the avearge self-reported prog experience rounded to the nearest int """ scores = session.query( func.json_extract( SurveyData.payload, '$.prog_experience')).filter(SurveyData.worker == worker).all() if (len(scores) == 0): return None ave_math_exp = sum(int(score[0]) for score in scores) * 1.0 / len(scores) return int(round(ave_math_exp))
def get_crawlingtask_youtube_info(objectid: str, PIC: str, actionid: str): get_crawlingtask_info = (db_session.query( Crawlingtask.id, Crawlingtask.objectid, func.json_extract(Crawlingtask.taskdetail, "$.youtube_url").label( "youtube_url"), func.json_extract(Crawlingtask.taskdetail, "$.when_exists").label( "when_exists"), func.json_extract(Crawlingtask.taskdetail, "$.data_source_format_id").label( "data_source_format_id"), Crawlingtask.status ) .select_from(Crawlingtask) .filter(Crawlingtask.objectid == objectid, Crawlingtask.actionid == actionid, func.json_extract(Crawlingtask.taskdetail, "$.PIC") == PIC, ) .order_by( Crawlingtask.created_at.desc()) ).first() return get_crawlingtask_info
def __getitem__(self, index): if isinstance(index, tuple): index = "$%s" % ("".join([ "[%s]" % elem if isinstance(elem, int) else '."%s"' % elem for elem in index ])) elif isinstance(index, int): index = "$[%s]" % index else: index = '$."%s"' % index # json_extract does not appear to return JSON sub-elements # which is weird. return func.json_extract(self.expr, index, type_=NullType)
def get_s11_crawlingtask_info(pic: str): # JOIN same table with aliases on SQLAlchemy crawlingtasks_06 = aliased(Crawlingtask, name='crawlingtasks_06') crawlingtasks_E5 = aliased(Crawlingtask, name='crawlingtasks_e5') s11_crawlingtask_info = (db_session.query( func.json_extract(crawlingtasks_06.taskdetail, f"$.album_id").label("itune_album_id"), crawlingtasks_06.id.label("06_id"), crawlingtasks_06.status.label("06_status"), crawlingtasks_E5.id.label("e5_id"), crawlingtasks_E5.status.label("e5_status") ) .select_from(crawlingtasks_06) .outerjoin(crawlingtasks_E5, # crawlingtasks_E5.id == func.json_extract(crawlingtasks_06.ext, "$.itunes_track_task_id"), #performance query problem text("crawlingtasks_E5.id = crawlingtasks_06.ext ->> '$.itunes_track_task_id'") )).filter( crawlingtasks_06.actionid == "9C8473C36E57472281A1C7936108FC06", func.json_extract(crawlingtasks_06.taskdetail, "$.PIC") == pic, # func.json_extract(crawlingtasks_06.taskdetail, "$.PIC") == 'Contribution_Apr_2021_Youtube collect_experiment_2021-06-07', ).order_by( crawlingtasks_06.created_at.desc()) return s11_crawlingtask_info
def get_datasourceId_from_crawlingtask(): conditions2 = "datasources.TrackId = crawlingtasks.ObjectId and datasources.SourceURI = crawlingtasks.TaskDetail ->> '$.youtube_url' and datasources.FormatID = crawlingtasks.TaskDetail ->> '$.data_source_format_id' and datasources.Valid = 1" record = (db_session.query(Crawlingtask.objectid.label("track_id"), DataSource.format_id, func.json_extract(Crawlingtask.taskdetail, "$.youtube_url").label("youtube_url"), DataSource.id.label("datasource_id"), Crawlingtask.status.label("crawler_status"), Crawlingtask.id.label("crawlingtask_id"), func.json_extract(Crawlingtask.ext, "$.message").label("message") ) .select_from(Crawlingtask) .outerjoin(DataSource, text(conditions2) ) .filter( func.DATE(Crawlingtask.created_at) == func.current_date(), # func.current_date(), Crawlingtask.actionid == 'F91244676ACD47BD9A9048CF2BA3FFC1', Crawlingtask.priority.in_([999, 10000]) ) .group_by(Crawlingtask.objectid,func.json_extract(Crawlingtask.taskdetail, "$.youtube_url"),func.json_extract(Crawlingtask.taskdetail, "$.data_source_format_id")) ) return record
def verified_up_to_exp(lvl, exp, exp_typ, experiment): """ Return whether a lvl was solved by adding all of the invariants up to a math experience level exp """ assert typ in ['math', 'prog'] tag = '{}-exp-le-{}'.format(typ, exp) exp_f = math_exp if typ == 'math' else prog_exp entries = session.query(VerifyData.provedflag)\ .filter(VerifyData.lvl == lvl)\ .filter(func.json_extract(VerifyData.config, '$.mode') == 'combined')\ .filter(func.json_extract(VerifyData.config, '$.tag') == tag)\ .order_by(VerifyData.time.desc())\ .all() if (len(entries) == 0): workers = workers_played(lvl, experiment) workers = [ w for w in workers if exp_f(w) is not None and exp_f(w) <= exp ] assert len(workers) == 0, "{}, {}, {}".format(lvl, exp, workers) return False return entries[0][0] == 1
def get_latest_evaluate_by_doc_type_id(nlp_task_id, doc_type_id): evaluate_result_path = '$.result.overall."f1-score"' if nlp_task_id == int(NlpTaskEnum.classify) else '$.scores.f1_score.overall.f1' q = session.query(EvaluateTask)\ .join(TrainTask, TrainTask.train_task_id == EvaluateTask.train_task_id)\ .join(TrainJob, TrainJob.train_job_id == TrainTask.train_job_id)\ .filter( EvaluateTask.evaluate_task_status == int(StatusEnum.success), TrainJob.doc_type_id == doc_type_id, ~EvaluateTask.is_deleted, ~TrainTask.is_deleted, ~TrainJob.is_deleted ).order_by( sa_func.json_extract(EvaluateTask.evaluate_task_result, evaluate_result_path).desc()) return q.first()
def get_crawlingtask_status(gsheet_name: str, sheet_name: str, actionid: str): if actionid == V4CrawlingTaskActionMaster.ARTIST_ALBUM_IMAGE: url = "url" else: url = "youtube_url" crawl_artist_image_status = (db_session.query( Crawlingtask.id, Crawlingtask.objectid, func.json_extract(Crawlingtask.taskdetail, f"$.{url}").label( f"{url}"), func.json_extract(Crawlingtask.taskdetail, "$.when_exists").label( "when_exists"), Crawlingtask.status, func.json_extract(Crawlingtask.ext, "$.message").label( "message") ) .select_from(Crawlingtask) .filter( func.json_extract(Crawlingtask.taskdetail, "$.PIC") == f"{gsheet_name}_{sheet_name}", Crawlingtask.actionid == actionid) .order_by(Crawlingtask.objectid, Crawlingtask.created_at.desc()) ) return crawl_artist_image_status
def get_album_image_cant_crawl(artistuuid: list): album_image_cant_crawl = (db_session.query(Album.title, Album.uuid, func.json_extract(Crawlingtask.taskdetail, "$.url").label( "image_url"), Crawlingtask.status ) .select_from(Crawlingtask) .join(Album, Album.uuid == Crawlingtask.objectid) .filter(func.DATE(Crawlingtask.created_at) == func.current_date(), Crawlingtask.actionid == 'OA9CPKSUT6PBGI1ZHPLQUPQCGVYQ71S9', Crawlingtask.objectid.in_(artistuuid) ) .order_by(Crawlingtask.objectid, Crawlingtask.created_at.desc()) ) return album_image_cant_crawl
def allInvs(session, enames=None, lvls=None, lvlsets=None, workers=None, assignments=None, enameSet=None, lvlSet=None, lvlsetSet=None, workerSet=None, assignmentSet=None, colSwaps=None): q = session.query( Event.experiment, func.json_extract(Event.payload, "$.lvlid"), func.json_extract(Event.payload, "$.lvlset"), func.json_extract(Event.payload, "$.workerId"), func.json_extract(Event.payload, "$.assignmentId"), func.json_extract(Event.payload, "$.raw"), func.json_extract(Event.payload, "$.canonical"), func.ifnull(func.json_extract(Event.payload, "$.colSwap"), 0) ) \ .filter(Event.type == "FoundInvariant") q = filterEvents(q, enames, lvls, lvlsets, workers, assignments) def gen(): for row in q.all(): if enameSet is not None: enameSet.add(row[0]) if lvlSet is not None: lvlSet.add(row[1]) if lvlsetSet is not None: lvlsetSet.add(row[2]) if workerSet is not None: workerSet.add(row[3]) if assignmentSet is not None: assignmentSet.add(row[4]) if colSwaps is not None: try: colSwaps[row[7]] += 1 except KeyError: colSwaps[row[7]] = 1 yield (row[5], row[6]) return set(dict(gen()).iteritems())
def collect_from_youtube_query(): collect_from_youtube_query = (db_session.query(PointLog.created_at, PointLog.valid, PointLog.id, func.json_extract(PointLog.info, "$.email").label("email"), func.json_extract(PointLog.info, "$.youtube_url").label( "contribution_url"), Album_Track.track_id, Track.title, Track.artist.label("track_artist"), Album.title.label("album_title"), Album.valid.label("album_valid"), Track.valid.label("track_valid"), func.json_extract(PointLog.info, "$.content_type").label( "contribution_type"), func.json_extract(PointLog.info, "$.comment").label( "contribution_comment"), PointLog.crawler_status, func.json_extract(PointLog.ext, "$.crawler_id").label("crawler_id"), Crawlingtask.status, PointLog.action_type, func.json_extract(DataSource.info, "$.source.title").label( "youtube_title"), func.json_extract(DataSource.info, "$.source.uploader").label( "youtube_uploader") ) .select_from(PointLog) .outerjoin(Crawlingtask, text("crawlingtasks.id = pointlogs.ext ->> '$.crawler_id'")) .outerjoin(Album_Track, (Album_Track.track_id == PointLog.target_id)) .outerjoin(Album, (Album.uuid == Album_Track.album_uuid) & (Album.valid == 1)) .outerjoin(Track, (Track.id == Album_Track.track_id) & (Track.id != "") & (Track.valid == 1)) .outerjoin(DataSource, text("datasources.id = crawlingtasks.ext ->> '$.data_source_id'")) .filter(PointLog.action_type == "CY", PointLog.created_at > '2020-10-01', PointLog.valid == 0 ) .group_by(PointLog.id) .order_by(PointLog.created_at.desc()) # .all() ) return collect_from_youtube_query
def __getitem__(self, index): if isinstance(index, tuple): index = "$%s" % ( "".join( [ "[%s]" % elem if isinstance(elem, int) else '."%s"' % elem for elem in index ] ) ) elif isinstance(index, int): index = "$[%s]" % index else: index = '$."%s"' % index # json_extract does not appear to return JSON sub-elements # which is weird. return func.json_extract(self.expr, index, type_=NullType)
def get_crawlingtask_download_video_youtube_status_from_df(gsheet_name: str, sheet_name: str): ''' MP3_SHEET_NAME = {"sheet_name": "MP_3", "fomatid": DataSourceFormatMaster.FORMAT_ID_MP3_FULL, "column_name": ["track_id", "Memo", "Mp3_link", "url_to_add"]} MP4_SHEET_NAME = {"sheet_name": "MP_4", "fomatid": DataSourceFormatMaster.FORMAT_ID_MP4_FULL, "column_name": ["track_id", "Memo", "MP4_link", "url_to_add"]} ''' crawl_artist_image_status = (db_session.query(Crawlingtask.id, Crawlingtask.actionid, Crawlingtask.objectid, Crawlingtask.taskdetail, Crawlingtask.status, Crawlingtask.ext ) .select_from(Crawlingtask) .filter( func.json_extract(Crawlingtask.taskdetail, "$.PIC") == f"{gsheet_name}_{sheet_name}", Crawlingtask.actionid == V4CrawlingTaskActionMaster.DOWNLOAD_VIDEO_YOUTUBE) .order_by(Crawlingtask.objectid, Crawlingtask.created_at.desc()) ) return crawl_artist_image_status