def app(request): request.max_content_length = MAX_CONTENT_LENGTH auth = request.authorization if not auth: return login_required() match = location_re.match(request.path) if not match: return NotFound() groups = match.groups() if not groups: return NotFound() db, schema, func_name, path = groups path = '' if path is None else path username = auth.username password = auth.password try: environ = Json({k: v for k, v in request.environ.items() if k.isupper()}) path = Json(filter(None, path.split('/'))) args = Json(dict(request.args)) data = Json(loads(request.get_data() or 'null')) db = connect(database=db, user=username, password=password) except Exception: logger.exception(request.path) return InternalServerError() with db.cursor() as cur: try: cur.callproc( 'http.process_request', (request.method, schema, func_name, environ, path, args, data)) result = cur.fetchone()[0] response = Response(dumps(result), mimetype='application/json') db.commit() return response except psycopg2.Error as e: db.rollback() logger.exception(request.path) if e.diag.message_primary.isdigit(): code = int(e.diag.message_primary) return default_exceptions.get(code, InternalServerError)() else: return InternalServerError(e.diag.message_primary) except Exception: db.rollback() logger.exception(request.path) return InternalServerError
def add_user_privileges(db, userPrivileges, userId): db.call_procedure("AddUserPrivileges", [Json(userPrivileges), userId]) return {"privileges": userPrivileges, "user_id": userId}
def create_multiprod_range_entry(dc, product, crses): conn = get_sqlconn(dc) txn = conn.begin() prodids = [ p.id for p in product.products ] wms_name = product.name # Attempt to insert row conn.execute(""" INSERT INTO wms.multiproduct_ranges (wms_product_name,lat_min,lat_max,lon_min,lon_max,dates,bboxes) VALUES (%(p_id)s, 0, 0, 0, 0, %(empty)s, %(empty)s) ON CONFLICT (wms_product_name) DO NOTHING """, {"p_id": wms_name, "empty": Json("")}) # Update extents conn.execute(""" UPDATE wms.multiproduct_ranges SET (lat_min,lat_max,lon_min,lon_max) = (wms_get_min(%(p_prodids)s, 'lat'), wms_get_max(%(p_prodids)s, 'lat'), wms_get_min(%(p_prodids)s, 'lon'), wms_get_max(%(p_prodids)s, 'lon')) WHERE wms_product_name=%(p_id)s """, {"p_id": wms_name, "p_prodids": prodids}) # Create sorted list of dates conn.execute(""" WITH sorted AS (SELECT to_jsonb(array_agg(dates.d)) AS dates FROM (SELECT DISTINCT to_date(metadata::json->'extent'->>'center_dt', 'YYYY-MM-DD') AS d FROM agdc.dataset WHERE dataset_type_ref = any (%(p_prodids)s) AND archived IS NULL ORDER BY d) dates) UPDATE wms.multiproduct_ranges SET dates=sorted.dates FROM sorted WHERE wms_product_name=%(p_id)s """, {"p_id": wms_name, "p_prodids": prodids}) # calculate bounding boxes results = list(conn.execute(""" SELECT lat_min,lat_max,lon_min,lon_max FROM wms.multiproduct_ranges WHERE wms_product_name=%(p_id)s """, {"p_id": wms_name} )) r = results[0] epsg4326 = datacube.utils.geometry.CRS("EPSG:4326") box = datacube.utils.geometry.box( float(r[2]), float(r[0]), float(r[3]), float(r[1]), epsg4326) svc = get_service_cfg() conn.execute(""" UPDATE wms.multiproduct_ranges SET bboxes = %s::jsonb WHERE wms_product_name=%s """, { crsid: Json(jsonise_bbox(box.to_crs(crs).boundingbox)) for crsid, crs in get_crses(svc).items() }, wms_name ) txn.commit() conn.close() return
def _to_tuple(obj): """Serialize dataclass into SQL insertable tuple""" return tuple( Json(v) if isinstance(v, dict) else v for _, v in obj.__dict__.items())
def contains(self, other): clone = self.as_json(True) if isinstance(other, (list, dict)): return Expression(clone, JSONB_CONTAINS, Json(other)) return Expression(clone, JSONB_EXISTS, other)
def contained_by(self, other): return Expression(cast_jsonb(self), JSONB_CONTAINED_BY, Json(other))
def concat(self, value): return super(JSONField, self).concat(Json(value))
def concat(self, rhs): if not isinstance(rhs, Node): rhs = Json(rhs) return Expression(self.as_json(True), OP.CONCAT, rhs)
retweet_filter = '-filter:retweets' reply_filter = '-filter:replies' atuser = '******' tweetsPerQry = 100 searchQuery = atuser + ' AND ' + retweet_filter + ' AND ' + reply_filter test_tweets = api.search(q=searchQuery, count=tweetsPerQry, tweet_mode='extended') tweetframe = pd.DataFrame(columns=['tweet_id', 'tweet_json'], index=range(len(test_tweets))) i = 0 if i < 10: for tweet in test_tweets: #print(tweet.id) tweetframe.loc[i]['tweet_id'] = tweet.id tweetframe.loc[i]['tweet_json'] = Json(tweet._json) i += 1 tablename = 'test_tweets' cur = con.cursor() tweetframe.to_sql(tablename, engine, if_exists='replace') #if not engine.dialect.has_table(engine,tablename): # command = """ # CREATE TABLE test_tweets( # tweet_id int, # tweet_json varchar(10000) # PRIMARY KEY (tweet_id) # ) # """ # cur.execute(command)
def convert_to_column(self, value, record, values=None, validate=True): val = self.convert_to_cache(value, record, validate=validate) return Json(val) if val else None
def _wrap_json(self, arguments: Dict[str, Any]): return { key: Json(value, dumps=self.json_dumps) if isinstance(value, dict) else value for key, value in arguments.items() }
def insert(items=None, commerce_listings=None, commerce_prices=None, scratch=False): if (items and commerce_listings and commerce_prices) is None: return if items is None: items = [] if commerce_listings is None: commerce_listings = [] if commerce_prices is None: commerce_prices = [] con = None try: con = psycopg2.connect("dbname='market_manipulator' user='******'") cur = con.cursor() if scratch: for item in items: cur.execute( "INSERT INTO scratch_items " "(id, name, icon, type, rarity, level, vendor_value, flags, game_types, restrictions) " "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", [ item[u'id'], item[u'name'], item[u'icon'], item[u'type'], item[u'rarity'], item[u'level'], item[u'vendor_value'], item[u'flags'], item[u'game_types'], item[u'restrictions'] ]) for commerce_listing in commerce_listings: for buy in xrange(len(commerce_listing[u'buys'])): buy_listings = commerce_listing[u'buys'][buy][u'listings'] buy_unit_price = commerce_listing[u'buys'][buy][ u'unit_price'] buy_quantity = commerce_listing[u'buys'][buy][u'quantity'] cur.execute( "INSERT INTO scratch_commerce_listings_buy" + "(id, buy_listings, buy_unit_price, buy_quantity) " "VALUES (%s, %s, %s, %s)", [ commerce_listing[u'id'], buy_listings, buy_unit_price, buy_quantity ]) for sell in xrange(len(commerce_listing[u'sells'])): sell_listings = commerce_listing[u'sells'][sell][ u'listings'] sell_unit_price = commerce_listing[u'sells'][sell][ u'unit_price'] sell_quantity = commerce_listing[u'sells'][sell][ u'quantity'] cur.execute( "INSERT INTO scratch_commerce_listings_sell" + "(id, sell_listings, sell_unit_price, sell_quantity) " "VALUES (%s, %s, %s, %s)", [ commerce_listing[u'id'], sell_listings, sell_unit_price, sell_quantity ]) for commerce_price in commerce_prices: cur.execute( "INSERT INTO scratch_commerce_prices " "(id, buy_quantity, buy_unit_price, sell_quantity, sell_unit_price) " "VALUES (%s, %s, %s, %s, %s)", [ commerce_price[u'id'], commerce_price[u'buys'][u'quantity'], commerce_price[u'buys'][u'unit_price'], commerce_price[u'sells'][u'quantity'], commerce_price[u'sells'][u'unit_price'] ]) else: for item in items: cur.execute("INSERT INTO items (id, data) VALUES (%s, %s)", [item[u'id'], Json(item)]) for commerce_listing in commerce_listings: cur.execute( "INSERT INTO commerce_listings (id, data) VALUES (%s, %s)", [commerce_listing[u'id'], Json(commerce_listing)]) for commerce_price in commerce_prices: cur.execute( "INSERT INTO commerce_prices (id, data) VALUES (%s, %s)", [commerce_price[u'id'], Json(commerce_price)]) con.commit() except psycopg2.DatabaseError, e: if con: con.rollback() print 'Database Error: %s' % e sys.exit(1)
parser.add_argument('--host', type=str, help='dbpassword', default="127.0.0.1") parser.add_argument('--dryrun', action="store_true") args = parser.parse_args() conn = psycopg2.connect(dbname=args.dbname, user=args.user, password=args.password, host=args.host) SELECT_SQL = "SELECT created, updated, id, json, version_id from records_metadata" UPDATE_SQL = "UPDATE records_metadata SET json = %s WHERE id = %s AND version_id = %s" with conn.cursor() as cur: with conn.cursor() as cur2: cur.execute(SELECT_SQL) for rec in cur: md = rec[3] if md is not None: b2rec_value = get_b2rec_value(md['_pid']) if not pid_has_doi(md['_pid']) and b2rec_value: doi_value = { 'type': 'DOI', 'value': '10.23728/b2share.{0}'.format(b2rec_value) } md['_pid'].append(doi_value) pprint(doi_value) pprint(md) if not args.dryrun: cur2.execute(UPDATE_SQL, (Json(md), rec[2], rec[4])) if not args.dryrun: conn.commit()
input_script = tx[counter:counter + scriptLen] counter += scriptLen #input_sequence is the seperator 'FF FF FF FF' for inputs and outputs input_sequence = tx[counter:counter + 8] counter += 8 input_details = { "in_num": input_num, "in_tx": input_tx, "in_index": input_index, "in_script": input_script, "address": "", "in_val": 0 } trans["inputs"].append(input_details) trans["JSONinputs"].append(Json(input_details)) #print "input:num,tx,index,script_len,sequence", input_num, input_tx, input_index, input_sequence # -----------------------------OUTPUT PARSING----------------------------------------------------- #let's get output count tx_outCount = tx[counter:counter + 2] #print tx_outCount if tx_outCount != "fd": # < 255 outputs outCount = int(tx_outCount, 16) counter += 2 elif tx_outCount == "fd" and len( access.getrawtransaction(bInfo["tx"][r], 1)["vout"]) >= 255: # > 255 outputs outCount = getLitEndian(tx[counter + 2:counter + 6]) counter += 6 else: # the actual hex byte === fd --> exactly 253 outputs
def run(pg_txn): val = Json(txn._marshal()) pg_txn.execute("SELECT pgetcd.submit(%s,%s)", (val, 10)) rows = pg_txn.fetchall() res = "{0}".format(rows[0][0]) return res
def concat(self, value): if not isinstance(value, Node): value = Json(value) return super(JSONField, self).concat(value)
def upsert_summary( msm, scores, anomaly: bool, confirmed: bool, msm_failure: bool, measurement_uid: str, software_name: str, software_version: str, platform: str, update: bool, ) -> None: """Insert a row in the fastpath_scores table. Overwrite an existing one.""" sql_base_tpl = dedent("""\ INSERT INTO fastpath (measurement_uid, report_id, domain, input, probe_cc, probe_asn, test_name, test_start_time, measurement_start_time, platform, software_name, software_version, scores, anomaly, confirmed, msm_failure) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT ON CONSTRAINT fastpath_pkey DO """) sql_update = dedent("""\ UPDATE SET report_id = excluded.report_id, domain = excluded.domain, input = excluded.input, probe_cc = excluded.probe_cc, probe_asn = excluded.probe_asn, test_name = excluded.test_name, test_start_time = excluded.test_start_time, measurement_start_time = excluded.measurement_start_time, platform = excluded.platform, software_name = excluded.software_name, software_version = excluded.software_version, scores = excluded.scores, anomaly = excluded.anomaly, confirmed = excluded.confirmed, msm_failure = excluded.msm_failure """) sql_noupdate = " NOTHING" tpl = sql_base_tpl + (sql_update if update else sql_noupdate) # TODO: remove msmt parsing from upsert_summary asn = int(msm["probe_asn"][2:]) # AS123 test_name = msm.get("test_name", None) input_ = msm.get("input", None) if test_name == "meek_fronted_requests_test" and isinstance(input_, list): domain = None if input_ is None else urlparse(input_[0]).netloc input_ = ":".join(input_) else: domain = None if input_ is None else urlparse(input_).netloc args = ( measurement_uid, msm["report_id"], domain, input_, msm["probe_cc"], asn, test_name, msm["test_start_time"], msm["measurement_start_time"], platform, software_name, software_version, Json(scores, dumps=ujson.dumps), anomaly, confirmed, msm_failure, ) # Send notification using pg_notify # TODO: do not send notifications during manual run or in devel mode notif_cols = ( "report_id", "input", "probe_cc", "probe_asn", "test_name", "test_start_time", "measurement_start_time", ) assert _autocommit_conn with _autocommit_conn.cursor() as cur: try: cur.execute(tpl, args) except psycopg2.ProgrammingError: log.error("upsert syntax error in %r", tpl, exc_info=True) return if cur.rowcount == 0: if update: log.error("Failed to upsert") else: metrics.incr("measurement_noupsert_count") log.info(f"measurement tid/uid collision") return else: metrics.incr("measurement_upsert_count")
def db_value(self, value): if value is None: return value if not isinstance(value, Json): return Json(value, dumps=self.dumps) return value
def seed_database(conn, schema="public"): with conn.cursor() as cur: cur.execute( sql.SQL("SET search_path TO {};").format(sql.Identifier(schema))) print("Writing metadata maps...", end="", flush=True) # Load metadata map with open(data_path / "metadata_map.json", "r") as fp: metadata_map = json.loads(fp.read()) # Make a table for each metadata field for field in config["metadata_cols"].keys(): table_name = "metadata_{}".format(field) cur.execute('DROP TABLE IF EXISTS "{table_name}";'.format( table_name=table_name)) cur.execute(""" CREATE TABLE "{table_name}" ( id INTEGER PRIMARY KEY, value TEXT NOT NULL ); """.format(table_name=table_name)) metadata_df = pd.DataFrame.from_records( list(metadata_map[field].items()), columns=["id", "value"]).set_index("id") df_to_sql(cur, metadata_df, table_name, index_label="id") cur.execute(""" CREATE INDEX "ix_{table_name}_id" ON "{table_name}"("id"); """.format(table_name=table_name)) print("done") print("Writing SNV maps...", end="", flush=True) # DNA SNVs dna_snp = process_dna_snvs(metadata_map["dna_snp"]) cur.execute('DROP TABLE IF EXISTS "dna_snp";') cur.execute(""" CREATE TABLE "dna_snp" ( id INTEGER PRIMARY KEY, snp_str TEXT NOT NULL, pos INTEGER NOT NULL, ref TEXT NOT NULL, alt TEXT NOT NULL, color TEXT NOT NULL, snv_name TEXT NOT NULL ); """) df_to_sql(cur, dna_snp, "dna_snp", index_label="id") cur.execute('CREATE INDEX "ix_dna_snp_pos" ON "dna_snp"("pos");') # AA SNVs gene_aa_snp = process_aa_snvs(metadata_map["gene_aa_snp"], "gene", genes) cur.execute('DROP TABLE IF EXISTS "gene_aa_snp";') cur.execute(""" CREATE TABLE "gene_aa_snp" ( id INTEGER PRIMARY KEY, snp_str TEXT NOT NULL, gene TEXT NOT NULL, pos INTEGER NOT NULL, ref TEXT NOT NULL, alt TEXT NOT NULL, color TEXT NOT NULL, snv_name TEXT NOT NULL, nt_pos INTEGER NOT NULL ); """) df_to_sql(cur, gene_aa_snp, "gene_aa_snp", index_label="id") cur.execute( 'CREATE INDEX "ix_gene_aa_snp_pos" ON "gene_aa_snp"("pos");') cur.execute( 'CREATE INDEX "ix_gene_aa_snp_nt_pos" ON "gene_aa_snp"("nt_pos");') cur.execute( 'CREATE INDEX "ix_gene_aa_snp_gene" ON "gene_aa_snp"("gene");') protein_aa_snp = process_aa_snvs(metadata_map["protein_aa_snp"], "protein", proteins) cur.execute('DROP TABLE IF EXISTS "protein_aa_snp";') cur.execute(""" CREATE TABLE "protein_aa_snp" ( id INTEGER PRIMARY KEY, snp_str TEXT NOT NULL, protein TEXT NOT NULL, pos INTEGER NOT NULL, ref TEXT NOT NULL, alt TEXT NOT NULL, color TEXT NOT NULL, snv_name TEXT NOT NULL, nt_pos INTEGER NOT NULL ); """) df_to_sql(cur, protein_aa_snp, "protein_aa_snp", index_label="id") cur.execute( 'CREATE INDEX "ix_protein_aa_snp_pos" ON "protein_aa_snp"("pos");') cur.execute( 'CREATE INDEX "ix_protein_aa_snp_nt_pos" ON "protein_aa_snp"("nt_pos");' ) cur.execute( 'CREATE INDEX "ix_protein_aa_snp_protein" ON "protein_aa_snp"("protein");' ) print("done") print("Writing location map...", end="", flush=True) # Locations location_map = pd.read_json(data_path / "location_map.json") cur.execute('DROP TABLE IF EXISTS "location";') cur.execute(""" CREATE TABLE "location" ( id INTEGER PRIMARY KEY, region TEXT NOT NULL, country TEXT NOT NULL, division TEXT NOT NULL, location TEXT NOT NULL ); """) df_to_sql(cur, location_map, "location", index_label="id") print("done") print("Writing groups and consensus SNVs...", end="", flush=True) # Consensus SNVs with (data_path / "group_consensus_snps.json").open("r") as fp: group_consensus_snps = json.loads(fp.read()) snp_fields = ["dna", "gene_aa", "protein_aa"] for grouping in group_consensus_snps.keys(): for snp_field in snp_fields: table_name = "{grouping}_consensus_{snp_field}_snp".format( grouping=grouping, snp_field=snp_field) # Create tables cur.execute('DROP TABLE IF EXISTS "{table_name}";'.format( table_name=table_name)) cur.execute(""" CREATE TABLE "{table_name}" ( name TEXT NOT NULL, snp_id INTEGER NOT NULL ); """.format(table_name=table_name)) # Collect tuples of (group, snp_id) group_snps = [] for group in group_consensus_snps[grouping].keys(): for snp_id in group_consensus_snps[grouping][group][ snp_field + "_snp_ids"]: group_snps.append((group, snp_id)) group_snp_df = pd.DataFrame.from_records( group_snps, columns=["name", "snp_id"]).set_index("name") df_to_sql(cur, group_snp_df, table_name, index_label="name") cur.execute(""" CREATE INDEX "idx_{table_name}_name" ON "{table_name}"("name"); """.format(table_name=table_name)) # Grouping tables # Build colormaps for grouping in group_consensus_snps.keys(): group_df = pd.DataFrame( {"name": list(group_consensus_snps[grouping].keys())}) group_df["color"] = group_df["name"].map( get_categorical_colormap(group_df["name"])) cur.execute( 'DROP TABLE IF EXISTS "{grouping}";'.format(grouping=grouping)) cur.execute(""" CREATE TABLE "{grouping}" ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, color TEXT NOT NULL ); """.format(grouping=grouping)) df_to_sql(cur, group_df, grouping, index_label="id") cur.execute(""" CREATE INDEX "idx_{grouping}_name" on "{grouping}"("name") """.format(grouping=grouping)) print("done") print("Writing sequence metadata...", end="", flush=True) # Sequence metadata case_data = pd.read_json(data_path / "case_data.json") # case_data = case_data.set_index("Accession ID") case_data["collection_date"] = pd.to_datetime( case_data["collection_date"]) case_data["submission_date"] = pd.to_datetime( case_data["submission_date"]) # print(case_data.columns) # Make a column for each metadata field metadata_cols = [] metadata_col_defs = "" for field in config["metadata_cols"].keys(): metadata_col_defs += "{field} INTEGER NOT NULL,\n".format( field=field) metadata_cols.append(field) # Make a column for each grouping grouping_cols = [] grouping_col_defs = "" for grouping in group_consensus_snps.keys(): grouping_col_defs += "{grouping} TEXT NOT NULL,\n".format( grouping=grouping) grouping_cols.append(grouping) cur.execute('DROP TABLE IF EXISTS "metadata";') cur.execute(""" CREATE TABLE "metadata" ( id INTEGER PRIMARY KEY, "Accession ID" TEXT NOT NULL, collection_date TIMESTAMP NOT NULL, submission_date TIMESTAMP NOT NULL, {metadata_col_defs} {grouping_col_defs} location_id INTEGER NOT NULL ); """.format(metadata_col_defs=metadata_col_defs, grouping_col_defs=grouping_col_defs)) df_to_sql( cur, case_data[["Accession ID", "collection_date", "submission_date"] + metadata_cols + grouping_cols + ["location_id"]], "metadata", index_label="id", ) # Create indices cur.execute( 'CREATE INDEX "ix_metadata_collection_date" ON "metadata"("collection_date");' ) cur.execute( 'CREATE INDEX "ix_metadata_submission_date" ON "metadata"("submission_date");' ) cur.execute( 'CREATE INDEX "ix_metadata_location_id" ON "metadata"("location_id");' ) for field in config["metadata_cols"].keys(): cur.execute( 'CREATE INDEX "ix_metadata_{field}" ON "metadata"("{field}");'. format(field=field)) print("done") print("Writing sequence SNVs...", end="", flush=True) # Sequence SNV data snp_fields = ["dna", "gene_aa", "protein_aa"] for snp_field in snp_fields: snp_col = snp_field + "_snp_str" table_name = "sequence_{snp_field}_snp".format(snp_field=snp_field) cur.execute('DROP TABLE IF EXISTS "{table_name}";'.format( table_name=table_name)) cur.execute(""" CREATE TABLE "{table_name}" ( sequence_id INTEGER NOT NULL, snp_id INTEGER NOT NULL ); """.format(table_name=table_name)) df_to_sql( cur, case_data[[snp_col]].explode(snp_col), table_name, index_label="sequence_id", ) cur.execute( 'CREATE INDEX "ix_{table_name}_sequence_id" ON "{table_name}"("sequence_id");' .format(table_name=table_name)) cur.execute( 'CREATE INDEX "ix_{table_name}_snp_id" ON "{table_name}"("snp_id");' .format(table_name=table_name)) print("done") print("Writing JSONs...", end="", flush=True) # Stats table cur.execute('DROP TABLE IF EXISTS "stats";') cur.execute(""" CREATE TABLE "stats" ( value JSON NOT NULL ); """) stats = { "num_sequences": len(case_data), "data_date": datetime.date.today().isoformat(), } cur.execute( """ INSERT INTO "stats" (value) VALUES (%s) """, [Json(stats)], ) # Country score # Just dump this as a big JSON cur.execute('DROP TABLE IF EXISTS "country_score";') cur.execute(""" CREATE TABLE "country_score" ( value JSON NOT NULL ); """) with (data_path / "country_score.json").open("r") as fp: country_score = json.loads(fp.read()) cur.execute( """ INSERT INTO "country_score" (value) VALUES (%s) """, [Json(country_score)], ) # Geo select tree # Just dump this as a JSON string in a table cur.execute('DROP TABLE IF EXISTS "geo_select_tree";') cur.execute(""" CREATE TABLE "geo_select_tree" ( value JSON NOT NULL ) """) with (data_path / "geo_select_tree.json").open("r") as fp: geo_select_tree = json.loads(fp.read()) cur.execute( """ INSERT INTO "geo_select_tree" (value) VALUES (%s) """, [Json(geo_select_tree)], ) print("done")
def contains(self, other): if isinstance(other, (list, dict)): return Expression(self, JSONB_CONTAINS, Json(other)) return Expression(cast_jsonb(self), JSONB_EXISTS, other)
def save_package_metadata(package_metadata): cur.execute("insert into package (id, metadata) values (%s, %s)", (package_metadata['name'], Json(package_metadata)))
def concat(self, rhs): return Expression(self.as_json(True), OP.CONCAT, Json(rhs))
def contains(self, other): if isinstance(other, (list, dict)): return Expression(self, OP.JSONB_CONTAINS, Json(other)) return Expression(self, OP.JSONB_EXISTS, Passthrough(other))
def _to_dict(obj): """Serialize dataclass into SQL insertable dictionary""" return { k: Json(v) if isinstance(v, dict) else v for k, v in obj.__dict__.items() }
def contained_by(self, other): return Expression(self, OP.JSONB_CONTAINED_BY, Json(other))
def rng_update(conn, rng, product, path=None): # pylint: disable=bad-continuation if isinstance(product, ProductLayerDef): if product.multi_product: assert path is None conn.execute(""" UPDATE wms.multiproduct_ranges SET lat_min=%s, lat_max=%s, lon_min=%s, lon_max=%s, dates=%s, bboxes=%s WHERE wms_product_name=%s """, rng["lat"]["min"], rng["lat"]["max"], rng["lon"]["min"], rng["lon"]["max"], Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), product.name) return product = product.product if path is not None: conn.execute(""" UPDATE wms.sub_product_ranges SET lat_min=%s, lat_max=%s, lon_min=%s, lon_max=%s, dates=%s, bboxes=%s WHERE product_id=%s AND sub_product_id=%s """, rng["lat"]["min"], rng["lat"]["max"], rng["lon"]["min"], rng["lon"]["max"], Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), product.id, path ) else: conn.execute(""" UPDATE wms.product_ranges SET lat_min=%s, lat_max=%s, lon_min=%s, lon_max=%s, dates=%s, bboxes=%s WHERE id=%s """, rng["lat"]["min"], rng["lat"]["max"], rng["lon"]["min"], rng["lon"]["max"], Json([t.strftime("%Y-%m-%d") for t in rng["times"]]), Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }), product.id )
def fit(df, fval, i, n): """ Given a dataframe df with group-by query results, partition attributes f (outer scope), and values fval for the partition attributes, try to find local patterns for all aggregation functions and n (TODO explain n). Also keep track of deviation of agg function results from the median for this values of the partition attributes. """ nonlocal global_dev_pos, global_dev_neg, valid_l_f, valid_c_f if not self.config.fit: return self.stats.startTimer('regression') for agg in aggList: if agg not in sample_invalid[i] or 'c' not in sample_invalid[ i][agg]: avg = mean(df[agg]) describe = [ avg, mode(df[agg]), percentile(df[agg], 25), percentile(df[agg], 50), percentile(df[agg], 75) ] dev_pos = max(df[agg]) - avg dev_neg = min(df[agg]) - avg # fitting constant theta_c = chisquare(df[agg].dropna())[1] self.stats.incr('patcand.local') if theta_c > self.config.theta_c: try: valid_c_f[i][agg] += 1 except KeyError: valid_c_f[i][agg] = 1 global_dev_pos[i][agg]['c'] = max( global_dev_pos[i][agg]['c'], dev_pos) global_dev_neg[i][agg]['c'] = min( global_dev_neg[i][agg]['c'], dev_neg) # self.pc.add_local(f,oldKey,v,a,agg,'const',theta_c) log.debug( "local constant pattern holds: (f: %s, %s, %s, agg: %s, GOF: %s) - dev-:%f - dev+:%f", f[i], fval, v[i], agg, theta_l, global_dev_neg[i][agg]['c'], global_dev_pos[i][agg]['c']) pattern.append( self.addLocal(f[i], fval, v[i], agg, 'const', theta_c, describe, 'NULL', dev_pos, dev_neg)) self.stats.incr('patterns.local') # fitting linear if agg not in sample_invalid[i] or 'l' not in sample_invalid[ i][agg]: if theta_c != 1 and ( (self.config.reg_package == 'sklearn' and all(attr in self.num for attr in v[i]) or (self.config.reg_package == 'statsmodels' and all(attr in self.num for attr in v[i])))): if self.config.reg_package == 'sklearn': lr = LinearRegression() lr.fit(df[v[i]], df[agg]) theta_l = lr.score(df[v[i]], df[agg]) theta_l = 1 - (1 - theta_l) * (n - 1) / ( n - len(v[i]) - 1) param = lr.coef_.tolist() param.append(lr.intercept_.tolist()) param = "'" + str(param) + "'" else: # statsmodels if n <= len(v[i]) + 1: # negative R^2 for sure return lr = sm.ols(agg + '~' + '+'.join([ attr if attr in self.num else 'C(' + attr + ')' for attr in v[i] ]), data=df, missing='drop').fit() theta_l = lr.rsquared_adj param = Json(dict(lr.params)) dev_pos = max(lr.resid) dev_neg = min(lr.resid) self.stats.incr('patcand.local') if theta_l and theta_l > self.config.theta_l: try: valid_l_f[i][agg] += 1 except KeyError: valid_l_f[i][agg] = 1 global_dev_pos[i][agg]['l'] = max( global_dev_pos[i][agg]['l'], dev_pos) global_dev_neg[i][agg]['l'] = min( global_dev_neg[i][agg]['l'], dev_neg) log.debug( "local linear pattern holds: (f: %s, %s, %s, agg: %s, GOF: %s) dev-: %f, dev+: %f", f[i], fval, v[i], agg, theta_l, global_dev_neg[i][agg]['l'], global_dev_pos[i][agg]['l']) pattern.append( self.addLocal(f[i], fval, v[i], agg, 'linear', theta_l, describe, param, dev_pos, dev_neg)) self.stats.incr('patterns.local') self.stats.stopTimer('regression')
def create_range_entry(dc, product, crses): conn = get_sqlconn(dc) txn = conn.begin() prodid = product.id # Attempt to insert row conn.execute(""" INSERT INTO wms.product_ranges (id,lat_min,lat_max,lon_min,lon_max,dates,bboxes) VALUES (%(p_id)s, 0, 0, 0, 0, %(empty)s, %(empty)s) ON CONFLICT (id) DO NOTHING """, {"p_id": prodid, "empty": Json("")}) # Update extents conn.execute(""" UPDATE wms.product_ranges SET (lat_min,lat_max,lon_min,lon_max) = (wms_get_min(%(p_idarr)s, 'lat'), wms_get_max(%(p_id)s, 'lat'), wms_get_min(%(p_id)s, 'lon'), wms_get_max(%(p_id)s, 'lon')) WHERE id=%(p_id)s """, {"p_id": prodid, "p_idarr": [ prodid ]}) # Create sorted list of dates conn.execute(""" WITH sorted AS (SELECT to_jsonb(array_agg(dates.d)) AS dates FROM (SELECT DISTINCT to_date(metadata::json->'extent'->>'center_dt', 'YYYY-MM-DD') AS d FROM agdc.dataset WHERE dataset_type_ref=%(p_id)s AND archived IS NULL ORDER BY d) dates) UPDATE wms.product_ranges SET dates=sorted.dates FROM sorted WHERE id=%(p_id)s """, {"p_id": prodid}) # calculate bounding boxes results = list(conn.execute(""" SELECT lat_min,lat_max,lon_min,lon_max FROM wms.product_ranges WHERE id=%s """, prodid)) r = results[0] epsg4326 = datacube.utils.geometry.CRS("EPSG:4326") box = datacube.utils.geometry.box( float(r[2]), float(r[0]), float(r[3]), float(r[1]), epsg4326) conn.execute(""" UPDATE wms.product_ranges SET bboxes = %s::jsonb WHERE id=%s """, Json( {crsid: {"top": box.to_crs(crs).boundingbox.top, "bottom": box.to_crs(crs).boundingbox.bottom, "left": box.to_crs(crs).boundingbox.left, "right": box.to_crs(crs).boundingbox.right} for crsid, crs in crses.items() } ), product.id) txn.commit() conn.close()
def fit(df, f, fval, v, n): if not self.config.fit: return log.debug("do regression for F=%s, f=%s", f, fval) self.stats.startTimer('regression') for agg in aggList: nonlocal global_dev_pos, global_dev_neg if agg not in sample_invalid or 'c' not in sample_invalid[agg]: avg = mean(df[agg]) describe = [ avg, mode(df[agg]), percentile(df[agg], 25), percentile(df[agg], 50), percentile(df[agg], 75) ] dev_pos = max(df[agg]) - avg dev_neg = min(df[agg]) - avg # fitting constant theta_c = chisquare(df[agg].dropna())[1] self.stats.incr('patcand.local') if theta_c > self.config.theta_c: nonlocal valid_c_f try: valid_c_f[agg] += 1 except KeyError: valid_c_f[agg] = 1 global_dev_pos[agg]['c'] = max( global_dev_pos[agg]['c'], dev_pos) global_dev_neg[agg]['c'] = min( global_dev_neg[agg]['c'], dev_neg) pattern.append( self.addLocal(f, fval, v, agg, 'const', theta_c, describe, 'NULL', dev_pos, dev_neg)) self.stats.incr('patterns.local') # fitting linear if agg not in sample_invalid or 'l' not in sample_invalid[agg]: if theta_c != 1 and ( (self.config.reg_package == 'sklearn' and all(attr in self.num for attr in v) or (self.config.reg_package == 'statsmodels' and all(attr in self.num for attr in v)))): if self.config.reg_package == 'sklearn': lr = LinearRegression() lr.fit(df[v], df[agg]) theta_l = lr.score(df[v], df[agg]) theta_l = 1 - (1 - theta_l) * (n - 1) / ( n - len(v) - 1) param = lr.coef_.tolist() param.append(lr.intercept_.tolist()) param = "'" + str(param) + "'" else: # statsmodels if n <= len(v) + 1: # negative R^2 for sure return lr = sm.ols(agg + '~' + '+'.join([ attr if attr in self.num else 'C(' + attr + ')' for attr in v ]), data=df, missing='drop').fit() # theta_l=lr.rsquared_adj theta_l = chisquare(df[agg], lr.predict())[1] param = Json(dict(lr.params)) dev_pos = max(lr.resid) dev_neg = min(lr.resid) self.stats.incr('patcand.local') if theta_l and theta_l > self.config.theta_l: nonlocal valid_l_f try: valid_l_f[agg] += 1 except KeyError: valid_l_f[agg] = 1 global_dev_pos[agg]['l'] = max( global_dev_pos[agg]['l'], dev_pos) global_dev_neg[agg]['l'] = min( global_dev_neg[agg]['l'], dev_neg) pattern.append( self.addLocal(f, fval, v, agg, 'linear', theta_l, describe, param, dev_pos, dev_neg)) self.stats.incr('patterns.local') self.stats.stopTimer('regression')
def get_prep_value(self, value): if value is not None: return Json(value, dumps=partial(json.dumps, cls=DateTimeAwareJSONEncoder)) return value
def PGJson(data): return Json(data, dumps=json_dumps),