def update(dataset_id, dictionary, author_id): # TODO(roman): Make author_id argument optional (keep old author if None). jsonschema.validate(dictionary, BASE_JSON_SCHEMA) with create_cursor() as cursor: if "description" not in dictionary: dictionary["description"] = None cursor.execute("""UPDATE dataset SET (name, description, public, author) = (%s, %s, %s, %s) WHERE id = %s""", (dictionary["name"], dictionary["description"], dictionary["public"], author_id, dataset_id)) # Replacing old classes with new ones cursor.execute("""DELETE FROM dataset_class WHERE dataset = %s""", (dataset_id,)) for cls in dictionary["classes"]: if "description" not in cls: cls["description"] = None cursor.execute("""INSERT INTO dataset_class (name, description, dataset) VALUES (%s, %s, %s) RETURNING id""", (cls["name"], cls["description"], dataset_id)) cls_id = cursor.fetchone()[0] for recording_mbid in cls["recordings"]: cursor.execute("INSERT INTO dataset_class_member (class, mbid) VALUES (%s, %s)", (cls_id, recording_mbid)) # If anything bad happens above, it should just rollback by default. commit()
def drop_tables(self): with create_cursor() as cursor: # TODO(roman): See if there's a better way to drop all tables. # FIXME: Need to drop all tables that we have there. #cursor.execute('DROP TABLE IF EXISTS i_am_a_table CASCADE;') pass commit()
def get_brewery(): b = True if 'brewery' in request.args: bry = '.*' + request.args['brewery'] + '.*' elif 'sid' in request.args: bry = request.args['sid'] b = False else: return "No brewery provided. You fool." conn = db.establish_connection() cur = db.create_cursor(conn) out = "" try: if b: cur.execute('SELECT * FROM Breweries WHERE name ~* %s', (bry, )) else: cur.execute('SELECT * FROM Breweries WHERE StateID=%s', (bry, )) for brew_data in cur: out += "<h1>Here's a brewery</h1><p>" + str( brew_data) + "</p><a href='/beer?id=" + str( brew_data[4]) + "'>BEERS</a>" # cur.execute('SELECT * FROM Beers WHERE BreweryId=%s',(brew_data[4],)) # for item in cur: # out+="<p>"+str(item)+"</p>" except Exception as e: out = "<h1>Here's an error</h1><p>" + str(e) + "</p>" finally: # conn.commit() db.close_connection(conn, cur) return out
def get(id): """Get user with a specified ID (integer).""" with create_cursor() as cursor: cursor.execute('SELECT id, created, musicbrainz_id FROM "user" WHERE id = %s', (id,)) row = cursor.fetchone() return dict(row) if row else None
def import_db_dump(archive_path): """Import data from .tar.xz archive into the database.""" pxz_command = ["pxz", "--decompress", "--stdout", archive_path] pxz = subprocess.Popen(pxz_command, stdout=subprocess.PIPE) table_names = _TABLES.keys() with create_cursor() as cursor: with tarfile.open(fileobj=pxz.stdout, mode="r|") as tar: for member in tar: file_name = member.name.split("/")[-1] if file_name == "SCHEMA_SEQUENCE": # Verifying schema version schema_seq = int(tar.extractfile(member).read().strip()) if schema_seq != db.SCHEMA_VERSION: raise Exception("Incorrect schema version! Expected: %d, got: %d." "Please, get the latest version of the dump." % (db.SCHEMA_VERSION, schema_seq)) else: logging.info("Schema version verified.") else: if file_name in table_names: logging.info(" - Importing data into %s table..." % file_name) cursor.copy_from(tar.extractfile(member), '"%s"' % file_name, columns=_TABLES[file_name]) commit() pxz.stdout.close()
def create_from_dict(dictionary, author_id): """Creates a new dataset from a dictionary. Returns: Tuple with two values: new dataset ID and error. If error occurs first will be None and second is an exception. If there are no errors, second value will be None. """ jsonschema.validate(dictionary, BASE_JSON_SCHEMA) with create_cursor() as cursor: if "description" not in dictionary: dictionary["description"] = None cursor.execute("""INSERT INTO dataset (id, name, description, public, author) VALUES (uuid_generate_v4(), %s, %s, %s, %s) RETURNING id""", (dictionary["name"], dictionary["description"], dictionary["public"], author_id)) dataset_id = cursor.fetchone()[0] for cls in dictionary["classes"]: if "description" not in cls: cls["description"] = None cursor.execute("""INSERT INTO dataset_class (name, description, dataset) VALUES (%s, %s, %s) RETURNING id""", (cls["name"], cls["description"], dataset_id)) cls_id = cursor.fetchone()[0] for recording_mbid in cls["recordings"]: cursor.execute("INSERT INTO dataset_class_member (class, mbid) VALUES (%s, %s)", (cls_id, recording_mbid)) # If anything bad happens above, it should just rollback by default. commit() return dataset_id
def _get_incremental_dump_timestamp(dump_id=None): with create_cursor() as cursor: if dump_id: cursor.execute("SELECT created FROM incremental_dumps WHERE id = %s", (dump_id,)) else: cursor.execute("SELECT created FROM incremental_dumps ORDER BY id DESC") row = cursor.fetchone() return row[0] if row else None
def _create_new_inc_dump_record(): """Creates new record for incremental dump and returns its ID and creation time.""" with create_cursor() as cursor: cursor.execute("INSERT INTO incremental_dumps (created) VALUES (now()) RETURNING id, created") commit() row = cursor.fetchone() logging.info("Created new incremental dump record (ID: %s)." % row[0]) return row
def count_lowlevel(mbid): """Count number of stored low-level submissions for a specified MBID.""" with db.create_cursor() as cursor: cursor.execute( "SELECT count(*) FROM lowlevel WHERE mbid = %s", (str(mbid),) ) return cursor.fetchone()[0]
def create(musicbrainz_id): with create_cursor() as cursor: # TODO(roman): Do we need to make sure that musicbrainz_id is case insensitive? cursor.execute('INSERT INTO "user" (musicbrainz_id) VALUES (%s) RETURNING id', (musicbrainz_id,)) commit() new_id = cursor.fetchone()[0] return new_id
def set_job_result(job_id, result): with create_cursor() as cursor: cursor.execute( "UPDATE dataset_eval_jobs " "SET (result, updated) = (%s, current_timestamp) " "WHERE id = %s", (result, job_id) ) commit()
def _get_recordings_in_class(class_id): with create_cursor() as cursor: cursor.execute("SELECT mbid FROM dataset_class_member WHERE class = %s", (class_id,)) rows = cursor.fetchall() recordings = [] for row in rows: recordings.append(row["mbid"]) return recordings
def api_get_beer(): name = None abbr = None abv = None brew = None style = None results = 20 vals = [] if 'name' in request.args: name = ".*" + request.args['name'] + ".*" vals.append(name) if 'brew' in request.args: brew = ".*" + request.args['brew'] + ".*" vals.append(brew) if 'style' in request.args: style = ".*" + request.args['style'] + ".*" vals.append(style) if 'abv' in request.args: abv = request.args['abv'] vals.append(abv) if 'abbr' in request.args: abbr = request.args['abbr'] vals.append(abbr) if 'results' in request.args: try: results = int(request.args['abbr']) except Exception as e: return str(e) if len(vals) == 0: return jsonify( 'Usage: supply a beername, brewery, state abbreviation, style, or abv' ) conn = db.establish_connection() cur = db.create_cursor(conn) vals.append(results) try: qbase = 'SELECT * FROM Beers WHERE TRUE' abbrq = '' if abbr == None else ' AND stateid=(SELECT stateid FROM States WHERE abbr=%s)' brewq = '' if brew == None else ' AND breweryid= ANY (SELECT breweryid FROM Breweries WHERE name ~* %s)' beerq = '' if name == None else ' AND name ~* %s' abvq = '' if abv == None else ' AND abv=%s' styleq = '' if style == None else ' AND style ~* %s' qend = ' ORDER BY ratings DESC LIMIT %s' cur.execute(qbase + beerq + brewq + styleq + abvq + abbrq + qend, vals) # if name != None: # cur.execute(qbase+brewq+abbrq,vals) # elif beer != None: # cur.execute(qbase+beerq+abbrq,vals) # else: # cur.execute(qbase+'TRUE'+abbrq,vals) results = cur.fetchall() except Exception as e: results = str(e) finally: db.close_connection(conn, cur) return jsonify(results)
def get_job(job_id): with create_cursor() as cursor: cursor.execute( "SELECT id, dataset_id, status, status_msg, result, created, updated " "FROM dataset_eval_jobs " "WHERE id = %s", (job_id,) ) return dict(cursor.fetchone()) if cursor.rowcount > 0 else None
def list_incremental_dumps(): """Get information about all created incremental dumps. Returns: List of (id, created) pairs ordered by dump identifier, or None if there are no incremental dumps yet. """ with create_cursor() as cursor: cursor.execute("SELECT id, created FROM incremental_dumps ORDER BY id DESC") return cursor.fetchall()
def _create_job(dataset_id): with create_cursor() as cursor: cursor.execute( "INSERT INTO dataset_eval_jobs (id, dataset_id, status) " "VALUES (uuid_generate_v4(), %s, %s) RETURNING id", (dataset_id, STATUS_PENDING) ) job_id = cursor.fetchone()[0] commit() return job_id
def _copy_tables(location, start_time=None, end_time=None): """Copies all tables into separate files within a specified location (directory). ou can also define time frame that will be used during data selection. Files in a specified directory will only contain rows that have timestamps within specified time frame. We assume that each table contains some sort of timestamp that can be used as a reference. """ def generate_where(row_name, start_t=start_time, end_t=end_time): """This function generates SQL WHERE clause that can be used to select rows only within specified time frame using `row_name` as a reference. """ if start_t or end_t: start_cond = "%s > '%s'" % (row_name, str(start_t)) if start_t else "" end_cond = "%s <= '%s'" % (row_name, str(end_t)) if end_t else "" if start_t and end_t: return "WHERE %s AND %s" % (start_cond, end_cond) else: return "WHERE %s%s" % (start_cond, end_cond) else: return "" with create_cursor() as cursor: # lowlevel with open(os.path.join(location, "lowlevel"), "w") as f: logging.info(" - Copying table lowlevel...") cursor.copy_to(f, "(SELECT %s FROM lowlevel %s)" % (", ".join(_TABLES["lowlevel"]), generate_where("submitted"))) # highlevel with open(os.path.join(location, "highlevel"), "w") as f: logging.info(" - Copying table highlevel...") cursor.copy_to(f, "(SELECT %s FROM highlevel %s)" % (", ".join(_TABLES["highlevel"]), generate_where("submitted"))) # highlevel_json with open(os.path.join(location, "highlevel_json"), "w") as f: logging.info(" - Copying table highlevel_json...") query = "SELECT %s FROM highlevel_json WHERE id IN (SELECT data FROM highlevel %s)" \ % (", ".join(_TABLES["highlevel_json"]), generate_where("submitted")) cursor.copy_to(f, "(%s)" % query) # statistics with open(os.path.join(location, "statistics"), "w") as f: logging.info(" - Copying table statistics...") cursor.copy_to(f, "(SELECT %s FROM statistics %s)" % (", ".join(_TABLES["statistics"]), generate_where("collected"))) # incremental_dumps with open(os.path.join(location, "incremental_dumps"), "w") as f: logging.info(" - Copying table incremental_dumps...") cursor.copy_to(f, "(SELECT %s FROM incremental_dumps %s)" % (", ".join(_TABLES["incremental_dumps"]), generate_where("created")))
def get_by_mb_id(musicbrainz_id): """Get user with a specified MusicBrainz ID.""" with create_cursor() as cursor: cursor.execute( 'SELECT id, created, musicbrainz_id ' 'FROM "user" ' 'WHERE LOWER(musicbrainz_id) = LOWER(%s)', (musicbrainz_id,) ) row = cursor.fetchone() return dict(row) if row else None
def get_next_pending_job(): with create_cursor() as cursor: cursor.execute( "SELECT id, dataset_id, status, status_msg, result, created, updated " "FROM dataset_eval_jobs " "WHERE status = %s " "ORDER BY created ASC " "LIMIT 1", (STATUS_PENDING,) ) return dict(cursor.fetchone()) if cursor.rowcount > 0 else None
def get_all_bids(): conn = db.establish_connection() cur = db.create_cursor(conn) out = [] try: cur.execute('SELECT BreweryID FROM Breweries') for bid in cur: out.append(bid[0]) except Exception as e: logger.info(e) finally: db.close_connection(conn, cur) return out
def get_blinks_from_bid(bid): conn = db.establish_connection() cur = db.create_cursor(conn) out = [] try: cur.execute('SELECT Link FROM Beers WHERE BreweryID=%s', (bid, )) for link in cur: out.append(link[0]) except Exception as e: logger.info(e) finally: db.close_connection(conn, cur) return out
def get_summary_data(mbid, offset=0): """Fetches the low-level and high-level features from for the specified MBID. Args: offset: Offset can be specified if you need to get summary for a different submission. They are ordered by creation time. Returns: Dictionary with low-level data ("lowlevel" key) for the specified MBID and, if it has been calculated, high-level data ("highlevel" key). """ summary = {} mbid = str(mbid) with db.create_cursor() as cursor: cursor.execute( "SELECT id, data " "FROM lowlevel " "WHERE mbid = %s " "ORDER BY submitted " "OFFSET %s", (mbid, offset) ) if not cursor.rowcount: raise db.exceptions.NoDataFoundException("Can't find low-level data for this recording.") ll_row_id, lowlevel = cursor.fetchone() if 'artist' not in lowlevel['metadata']['tags']: lowlevel['metadata']['tags']['artist'] = ["[unknown]"] if 'release' not in lowlevel['metadata']['tags']: lowlevel['metadata']['tags']['release'] = ["[unknown]"] if 'title' not in lowlevel['metadata']['tags']: lowlevel['metadata']['tags']['title'] = ["[unknown]"] lowlevel['metadata']['audio_properties']['length_formatted'] = \ time.strftime("%M:%S", time.gmtime(lowlevel['metadata']['audio_properties']['length'])) summary['lowlevel'] = lowlevel cursor.execute( "SELECT highlevel_json.data " "FROM highlevel, highlevel_json " "WHERE highlevel.id = %s " " AND highlevel.data = highlevel_json.id " " AND highlevel.mbid = %s", (ll_row_id, mbid) ) if cursor.rowcount: summary['highlevel'] = cursor.fetchone()[0] return summary
def _any_new_data(from_time): """Checks if there's any new data since specified time in tables that support incremental dumps. Returns: True if there is new data in one of tables that support incremental dumps, False if there is no new data there. """ with create_cursor() as cursor: cursor.execute("SELECT count(*) FROM lowlevel WHERE submitted > %s", (from_time,)) lowlevel_count = cursor.fetchone()[0] cursor.execute("SELECT count(*) FROM highlevel WHERE submitted > %s", (from_time,)) highlevel_count = cursor.fetchone()[0] return lowlevel_count > 0 or highlevel_count > 0
def drop_tables(self): with create_cursor() as cursor: # TODO(roman): See if there's a better way to drop all tables. cursor.execute('DROP TABLE IF EXISTS highlevel_json CASCADE;') cursor.execute('DROP TABLE IF EXISTS highlevel CASCADE;') cursor.execute('DROP TABLE IF EXISTS lowlevel CASCADE;') cursor.execute('DROP TABLE IF EXISTS statistics CASCADE;') cursor.execute('DROP TABLE IF EXISTS incremental_dumps CASCADE;') cursor.execute('DROP TABLE IF EXISTS dataset_eval_jobs CASCADE;') cursor.execute('DROP TABLE IF EXISTS dataset_class_member CASCADE;') cursor.execute('DROP TABLE IF EXISTS dataset_class CASCADE;') cursor.execute('DROP TABLE IF EXISTS dataset CASCADE;') cursor.execute('DROP TABLE IF EXISTS "user" CASCADE;') commit()
def get_by_user_id(user_id, public_only=True): """Get datasets created by a specified user. Returns: List of dictionaries with dataset details. """ with create_cursor() as cursor: where = "WHERE author = %s" if public_only: where += " AND public = TRUE" cursor.execute("SELECT id, name, description, author, created " "FROM dataset " + where, (user_id,)) return [dict(row) for row in cursor.fetchall()]
def _get_classes(dataset_id): with create_cursor() as cursor: cursor.execute( "SELECT id, name, description " "FROM dataset_class " "WHERE dataset = %s", (dataset_id,) ) rows = cursor.fetchall() classes = [] for row in rows: row = dict(row) row["recordings"] = _get_recordings_in_class(row["id"]) classes.append(row) return classes
def load_low_level(mbid, offset=0): """Load low-level data for a given MBID.""" with db.create_cursor() as cursor: cursor.execute( "SELECT data::text " "FROM lowlevel " "WHERE mbid = %s " "ORDER BY submitted " "OFFSET %s", (str(mbid), offset) ) if not cursor.rowcount: raise db.exceptions.NoDataFoundException row = cursor.fetchone() return row[0]
def load_high_level(mbid, offset=0): """Load high-level data for a given MBID.""" with db.create_cursor() as cursor: cursor.execute( "SELECT hlj.data::text " "FROM highlevel hl " "JOIN highlevel_json hlj " "ON hl.data = hlj.id " "WHERE mbid = %s " "ORDER BY submitted " "OFFSET %s", (str(mbid), offset) ) if not cursor.rowcount: raise db.exceptions.NoDataFoundException return cursor.fetchone()[0]
def get_stats(): stats_keys = ["lowlevel-lossy", "lowlevel-lossy-unique", "lowlevel-lossless", "lowlevel-lossless-unique"] # TODO: Port this to new implementation: stats = cache._mc.get_multi(stats_keys, key_prefix="ac-num-") last_collected = cache.get('last-collected') # Recalculate everything together, always. if sorted(stats_keys) != sorted(stats.keys()) or last_collected is None: stats_parameters = dict([(a, 0) for a in stats_keys]) with create_cursor() as cursor: cursor.execute("SELECT now() as now, collected FROM statistics ORDER BY collected DESC LIMIT 1") update_db = False if cursor.rowcount > 0: (now, last_collected) = cursor.fetchone() if cursor.rowcount == 0 or now - last_collected > datetime.timedelta(minutes=59): update_db = True cursor.execute("SELECT lossless, count(*) FROM lowlevel GROUP BY lossless") for row in cursor.fetchall(): if row[0]: stats_parameters['lowlevel-lossless'] = row[1] if not row[0]: stats_parameters['lowlevel-lossy'] = row[1] cursor.execute("SELECT lossless, count(*) FROM (SELECT DISTINCT ON (mbid) mbid, lossless FROM lowlevel ORDER BY mbid, lossless DESC) q GROUP BY lossless;") for row in cursor.fetchall(): if row[0]: stats_parameters['lowlevel-lossless-unique'] = row[1] if not row[0]: stats_parameters['lowlevel-lossy-unique'] = row[1] if update_db: for key, value in six.iteritems(stats_parameters): cursor.execute("INSERT INTO statistics (collected, name, value) VALUES (now(), %s, %s) RETURNING collected", (key, value)) commit() cursor.execute("SELECT now()") last_collected = cursor.fetchone()[0] value = stats_parameters # TODO: Port this to new implementation: cache._mc.set_multi(stats_parameters, key_prefix="ac-num-", time=STATS_CACHE_TIMEOUT) cache.set('last-collected', last_collected, time=STATS_CACHE_TIMEOUT) else: value = stats return value, last_collected
def get_beers(): if 'id' in request.args: id = request.args['id'] else: return "No brewery id provided. You fool." conn = db.establish_connection() cur = db.create_cursor(conn) out = "" try: cur.execute('SELECT * FROM Beers WHERE BreweryId=%s', (id, )) for item in cur: out += "<p>" + str(item) + "</p>" except Exception as e: out = "<h1>Here's an error</h1><p>" + str(e) + "</p>" finally: # conn.commit() db.close_connection(conn, cur) return out
def get_last_submitted_recordings(): last_submitted_data = cache.get('last-submitted-data') if not last_submitted_data: with create_cursor() as cursor: cursor.execute("""SELECT mbid, data->'metadata'->'tags'->'artist'->>0, data->'metadata'->'tags'->'title'->>0 FROM lowlevel ORDER BY id DESC LIMIT 5 OFFSET 10""") last_submitted_data = cursor.fetchall() last_submitted_data = [ (r[0], r[1], r[2]) for r in last_submitted_data if r[1] and r[2] ] cache.set('last-submitted-data', last_submitted_data, time=LAST_MBIDS_CACHE_TIMEOUT) return last_submitted_data
def get_jobs_for_dataset(dataset_id): """Get a list of evaluation jobs for the specified dataset. Args: dataset_id: UUID of the dataset. Returns: List of evaluation jobs (dicts) for the dataset. Ordered by creation time (oldest job first) """ with create_cursor() as cursor: cursor.execute( "SELECT id, dataset_id, status, status_msg, result, created, updated " "FROM dataset_eval_jobs " "WHERE dataset_id = %s " "ORDER BY created ASC", (dataset_id,) ) return [dict(j) for j in cursor.fetchall()]
def evaluate_dataset(dataset_id): """Add dataset into evaluation queue. Args: dataset_id: ID of the dataset that needs to be added into the list of jobs. Returns: ID of the newly created evaluation job. """ with create_cursor() as cursor: cursor.execute( "SELECT count(*) FROM dataset_eval_jobs WHERE dataset_id = %s AND status IN %s", (dataset_id, (STATUS_PENDING, STATUS_RUNNING)) ) if cursor.fetchone()[0] > 0: raise JobExistsException validate_dataset(db.dataset.get(dataset_id)) return _create_job(dataset_id)
def get(id): """Get dataset with a specified ID. Returns: Dictionary with dataset details if it has been found, None otherwise. """ with create_cursor() as cursor: cursor.execute( "SELECT id, name, description, author, created, public " "FROM dataset " "WHERE id = %s", (str(id),) ) if cursor.rowcount > 0: row = dict(cursor.fetchone()) row["classes"] = _get_classes(row["id"]) return row else: return None
def get_state(): if 'ab' in request.args: ab = request.args['ab'] else: ab = '__' conn = db.establish_connection() cur = db.create_cursor(conn) out = "" try: cur.execute('SELECT * FROM States WHERE Abbr LIKE %s', (ab, )) for item in cur: out += "<p>" + str(item) + "</p><a href='/brewery?sid=" + str( item[2]) + "'>BREWERIES</a>" except Exception as e: out = "<h1>Here's an error</h1><p>" + str(e) + "</p>" finally: # conn.commit() db.close_connection(conn, cur) return out
def main(): ts = time() s = requests.session() f = open("test.html", "a") #download_dir = setup_download_dir() # links = [] login(s) bids = get_all_bids() # for state in states: # l = get_links(state, s) # links += l # p_to_f(str(l)) # print(links) # logger.info("Total brew and beer data: %s",download_link(links[0],s)) # Create a queue to communicate with the worker threads queue = Queue() # # Create 8 worker threads bc = 0 for x in range(8): conn = db.establish_connection() cur = db.create_cursor(conn) worker = DownloadWorker(queue, cur, conn, bc) # # Setting daemon to True will let the main thread exit even though the workers are blocking worker.daemon = True worker.start() # # Put the tasks into the queue as a tuple for bid in bids: links = get_blinks_from_bid(bid) for link in links: # logger.info('Queueing {}'.format(link)) queue.put((link, s)) # # Causes the main thread to wait for the queue to finish processing all the tasks queue.join() # conn.commit() # db.close_connection(conn,cur) f.close() print(bids) print(ld) logging.info('Took %s', time() - ts)
def api_get_brewery(): name = None abbr = None beer = None vals = [] if 'name' in request.args: name = ".*" + request.args['name'] + ".*" vals.append(name) if 'beer' in request.args: beer = ".*" + request.args['beer'] + ".*" vals.append(beer) if 'abbr' in request.args: abbr = request.args['abbr'] vals.append(abbr) results = None if name == None and abbr == None and beer == None: return jsonify( 'Usage: supply a brewery name: ?name=brewname, and or a state abbreviation: ?abbr=stabbr, and or a beer name: ?beer=bname' ) conn = db.establish_connection() cur = db.create_cursor(conn) try: qbase = 'SELECT * FROM Breweries WHERE ' abbrq = '' if abbr == None else ' AND stateid=(SELECT stateid FROM States WHERE abbr=%s)' beerq = '' if beer == None else 'breweryid= ANY (SELECT breweryid FROM Beers WHERE name ~* %s)' brewq = '' if name == None else 'name ~* %s' if name != None: cur.execute(qbase + brewq + abbrq, vals) elif beer != None: cur.execute(qbase + beerq + abbrq, vals) else: cur.execute(qbase + 'TRUE' + abbrq, vals) results = cur.fetchall() except Exception as e: results = str(e) finally: return jsonify(results)
def get_statistics_data(): with create_cursor() as cursor: cursor.execute( "SELECT name, array_agg(collected ORDER BY collected ASC) AS times," " array_agg(value ORDER BY collected ASC) AS values " "FROM statistics " "GROUP BY name" ) stats_key_map = { "lowlevel-lossy": "Lossy (all)", "lowlevel-lossy-unique": "Lossy (unique)", "lowlevel-lossless": "Lossless (all)", "lowlevel-lossless-unique": "Lossless (unique)" } ret = [] total_unique = {"name": "Total (unique)", "data": {}} total_all = {"name": "Total (all)", "data": {}} for val in cursor: pairs = zip([_make_timestamp(v) for v in val[1]], val[2]) ret.append({ "name": stats_key_map.get(val[0], val[0]), "data": [[v[0], v[1]] for v in pairs] }) second = {} if val[0] in ["lowlevel-lossy", "lowlevel-lossless"]: second = total_all elif val[0] in ["lowlevel-lossy-unique", "lowlevel-lossless-unique"]: second = total_unique for pair in pairs: if pair[0] in second['data']: second['data'][pair[0]] = second['data'][pair[0]] + pair[1] else: second['data'][pair[0]] = pair[1] total_unique['data'] = [[k, total_unique['data'][k]] for k in sorted(total_unique['data'].keys())] total_all['data'] = [[k, total_all['data'][k]] for k in sorted(total_all['data'].keys())] ret.extend([total_unique, total_all]) return ret
prob2 = clf2.predict_proba(data) pre1 = clf1.predict(data) pre2 = clf2.predict(data) result = [None] * len(data) for i in range(len(prob1)): if max(prob1[i]) > max(prob2[i]): result[i] = pre1[i] else: result[i] = pre2[i] return result if __name__ == "__main__": conn = establish_connection() cur = create_cursor(conn) data, style = grab_rand_data(cur, length) svd = TruncatedSVD(n_components=dim, n_iter=7, random_state=42) print(len(data)) # print(vals) # print(len(data[999])) data = check_array(data, accept_sparse=True) # print("d1: ",data[0]) svd.fit(data) d = svd.fit_transform(data) # print("d1: ",data[0]) #DummyClassifier(strategy='most_frequent').fit(d,[styledict[s] for s in style]) # classifier1 = MLPClassifier(random_state=1,max_iter=1000).fit(d,[styledict[s] for s in style]) classifier = GradientBoostingClassifier( n_estimators=100, init=MLPClassifier(max_iter=1000)).fit(d,