def move_user(): for each_doc in mongo_user.find().sort('_id', direction=ASCENDING): item = dict() item['gen_time'] = each_doc.pop('_id').generation_time item['name'] = each_doc['name'] item['credit'] = each_doc['credit'] if 'credit' in each_doc else 0 item[ 'password'] = each_doc['password'] if 'password' in each_doc else 0 item['exp'] = each_doc['exp'] if 'exp' in each_doc else 0 item['role'] = each_doc['role'] if 'role' in each_doc else 0 if len(item['name']) > 45: print(item['name']) continue cursor.execute(INSERT_USER_SQL, item) cursor.execute(GET_USER_ID_SQL, (each_doc['name'])) user_id = cursor.fetchone()['user_id'] cursor.execute(DELETE_USER_FOCUS_VIDEO_SQL, (user_id)) cursor.execute(DELETE_USER_FOCUS_AUTHOR_SQL, (user_id)) if 'favoriteAid' in each_doc: for each_aid in each_doc['favoriteAid']: if each_aid == None or each_aid > 4294967295: continue item = {} item['user_id'] = int(user_id) item['video_id'] = int(each_aid) cursor.execute(INSERT_USER_FOCUS_VIDEO_SQL, item) if 'favoriteMid' in each_doc: for each_mid in each_doc['favoriteMid']: if each_mid == None or each_mid > 4294967295: continue item = {} item['user_id'] = int(user_id) item['author_id'] = int(each_mid) cursor.execute(INSERT_USER_FOCUS_AUTHOR_SQL, item)
def get_recommendation(): if request.method == "POST": RATINGS_PATH = '/srv/movielens/ratings_matrix.npz' RATINGS_MATRIX = ratings_matrix.ratings_matrix(RATINGS_PATH) user_id = request.json["user_id"] sql_query = "SELECT DISTINCT movie_id FROM user_favorite WHERE user_id = %s" cursor.execute(sql_query, tuple([user_id])) movie_ids = cursor.fetchall() movie_list = [] if len(movie_ids) == 0: result = {"recommended_movies": movie_list} return jsonify(result) liked = {} for movie_id in movie_ids: movie_id = movie_id[0] liked[movie_id] = 50 print(liked) recommendations = get_matches(RATINGS_MATRIX, liked, NUM_RECOMMENDATIONS) print(recommendations) sql_query = "SELECT * FROM movie WHERE id = %s" for index in range(NUM_RECOMMENDATIONS): movie_id = recommendations[index][1] movie_id = RATINGS_MATRIX.imdb_id(movie_id) cursor.execute(sql_query, tuple([int(movie_id)])) movie_row = cursor.fetchone() movie = get_movie_details(movie_row) movie_list.append(movie) result = {"recommended_movies": movie_list} print(movie_list) return jsonify(result)
def get_movie_details(row): id = row[0] sql_query = "SELECT person.name FROM person, person_junction " +\ "WHERE person.id = person_junction.person_id" +\ " AND person_junction.role = 'cast'" +\ " AND person_junction.movie_id = %s" cursor.execute(sql_query, tuple([id])) casts = cursor.fetchmany(size=4) actors = [] for cast in casts: actors.append(cast[0]) sql_query = "SELECT rating FROM ratings WHERE movie_id = %s" cursor.execute(sql_query, tuple([id])) rating = cursor.fetchone()[0] movie = { "id": row[0], "title": row[1], "year": row[2], "url": row[3], "plot": row[5][:200] + " ...", "genre": row[6], "language": row[7], "casts": actors, "rating": rating } return movie
def _fetch_posts(epoch, town, cache=True): p = r.pipeline() posts = r.smembers(f'posts:{town}:{epoch}') hot_factors = [] if len(posts) != 0: # posts is a set in redis cache. This is # to (naively) support concurrent caching of posts # without worrying about duplicate posts posts = list(posts) for i in range(len(posts)): posts[i] = posts[i].decode('utf-8') p.get(f'post:{posts[i]}:hot_factor') hot_factors = [None if elem is None else int(elem) for elem in p.execute()] _calculate_hot_factors_batch(posts, hot_factors) return posts, hot_factors posts = [] cursor.execute('SELECT creator, creationTime, votes, "views" \ FROM Posts WHERE town=? \ AND creationTime BETWEEN ? and ?', town, epoch, epoch + TIME_BLOCK_SIZE - 1) row = cursor.fetchone() while row: post_id = _construct_post_id(row[0], row[1]) posts.append(post_id) hf = calculate_hot_factor(row[1], row[2], row[3]) hot_factors.append(hf) if cache: p.set(f'post:{post_id}:votes', row[2]) p.set(f'post:{post_id}:views', row[3]) p.set(f'post:{post_id}:hot_factor', hf) # force app to recalculate hot factor p.expire(f'post:{post_id}:hot_factor', HOT_FACTOR_EXPIRATION) row = cursor.fetchone() if cache: if (len(posts)) != 0: p.sadd(f'posts:{town}:{epoch}', *posts) p.execute() return posts, hot_factors
def get_setting(setting): # Get a setting from the database query = ("SELECT value FROM settings WHERE setting='%s' ORDER BY " "last_modified DESC LIMIT 1" % setting) cursor.execute(query) return cursor.fetchone()["value"]
def get_setting(setting): # Get a setting from the database query = ("SELECT value FROM settings WHERE setting=%s ORDER BY " "last_modified DESC LIMIT 1") cursor.execute(query, (setting, )) return cursor.fetchone()["value"]
def get_favorite(): if request.method == "POST": user_id = request.json["user_id"] sql_query = "SELECT movie_id FROM user_favorite WHERE user_id = %s" cursor.execute(sql_query, tuple([user_id])) rows = cursor.fetchall() movie_list = [] for movie_id in rows: movie_id = movie_id[0] sql_query = "SELECT * FROM movie WHERE id = %s" cursor.execute(sql_query, tuple([movie_id])) movie_row = cursor.fetchone() movie = get_movie_details(movie_row) movie_list.append(movie) result = {"favorite_movies": movie_list} return jsonify(result)
def find_user(cls, username): try: select_query = """SELECT id, username, pwd FROM users WHERE username = %s""" cursor.execute(select_query, username) row = cursor.fetchone() if row: return cls(row['id'], row['username'], row['pwd']) except (Exception, psycopg2.Error) as error: connection.rollback() return { 'status': 500, 'message': json.dumps(error, default=str) }, 500
def get(self, order_id): get_query = """SELECT * FROM parcels WHERE id = %s""" cursor.execute(get_query, (order_id, )) one_parcel = cursor.fetchone() if one_parcel is not None: return { 'status': 200, 'data': json.dumps(one_parcel, default=str, separators=(',', ': ')) }, 200 else: return { 'message': "Parcel with id '{}' does not exist.".format(order_id) }, 404
def post(self): data = ParcelsList.parser.parse_args() create_parcels_table() insert_query = """INSERT INTO parcels (placedBy, weight, weightmetric, sentOn, deliveredOn, status, source, destination, currentlocation) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING id;""" record_to_insert = list(data.values()) cursor.execute(insert_query, record_to_insert) cursor.execute('SELECT LASTVAL()') row_id = cursor.fetchone()['lastval'] connection.commit() return { 'status': 201, 'data': [{ 'id': row_id, 'message': 'order created' }] }, 201
def _calculate_hot_factors_batch(posts, hot_factors): # posts at these indices don't have hot factors # in cache (expired or never calculated) indices = [] p = r.pipeline() for i in range(len(posts)): post = posts[i] if hot_factors[i] is None: indices.append(i) p.get(f'post:{post}:votes') p.get(f'post:{post}:views') stats = p.execute() inputs = [] for i in range(0, len(stats), 2): post = posts[indices[i//2]] creator, creation_time = _deconstruct_post_id(post) # if votes or views doesn't exist if stats[i] is None or stats[i+1] is None: # kinda bothers me that there is no way to "batch" # SELECTs cursor.execute('SELECT votes, "views" FROM Posts\ WHERE creator=? AND creationTime=?', creator, creation_time) row = cursor.fetchone() stats[i], stats[i+1] = row[0], row[1] p.set(f'post:{post}:votes', row[0]) p.set(f'post:{post}:views', row[1]) else: stats[i], stats[i+1] = int(stats[i]), int(stats[i+1]) hf = calculate_hot_factor(creation_time, stats[i], stats[i+1]) hot_factors[indices[i//2]] = hf p.set(f'post:{post}:hot_factor', hf) # reuse hot factor up to HOT_FACTOR_EXPIRATION p.expire(f'post:{post}:hot_factor', HOT_FACTOR_EXPIRATION) p.execute()
def fill_empty_affiliations(): # When a record is added, it has no affiliation data. Also, when an affiliation # mapping is changed via the UI, affiliation data will be set to NULL. This # function finds any records with NULL affiliation data and fills them. update_status('Filling empty affiliations') log_activity('Info', 'Filling empty affiliations') # Process any changes to the affiliations or aliases, and set any existing # entries in analysis_data to NULL so they are filled properly. # First, get the time we started fetching since we'll need it later cursor.execute("SELECT current_timestamp(6) as fetched") affiliations_fetched = cursor.fetchone()['fetched'] # Now find the last time we worked on affiliations, to figure out what's new affiliations_processed = get_setting('affiliations_processed') get_changed_affiliations = ("SELECT domain FROM affiliations WHERE " "last_modified >= %s") cursor_people.execute(get_changed_affiliations, (affiliations_processed, )) changed_affiliations = list(cursor_people) # Process any affiliations which changed since we last checked for changed_affiliation in changed_affiliations: log_activity( 'Debug', 'Resetting affiliation for %s' % changed_affiliation['domain']) set_author_to_null = ( "UPDATE analysis_data SET author_affiliation = NULL " "WHERE author_email = %s") cursor.execute(set_author_to_null, (changed_affiliation['domain'], )) db.commit() set_committer_to_null = ( "UPDATE analysis_data SET committer_affiliation = NULL " "WHERE committer_email = %s") cursor.execute(set_committer_to_null, (changed_affiliation['domain'], )) db.commit() # Update the last fetched date, so we know where to start next time. update_affiliations_date = ("UPDATE settings SET value=%s " "WHERE setting = 'affiliations_processed'") cursor.execute(update_affiliations_date, (affiliations_fetched, )) db.commit() # On to the aliases, now # First, get the time we started fetching since we'll need it later cursor.execute("SELECT current_timestamp(6) as fetched") aliases_fetched = cursor.fetchone()['fetched'] # Now find the last time we worked on aliases, to figure out what's new aliases_processed = get_setting('aliases_processed') get_changed_aliases = ("SELECT alias FROM aliases WHERE " "last_modified >= %s") cursor_people.execute(get_changed_aliases, (aliases_processed, )) changed_aliases = list(cursor_people) # Process any aliases which changed since we last checked for changed_alias in changed_aliases: log_activity('Debug', 'Resetting affiliation for %s' % changed_alias['alias']) set_author_to_null = ( "UPDATE analysis_data SET author_affiliation = NULL " "WHERE author_raw_email = %s") cursor.execute(set_author_to_null, (changed_alias['alias'], )) db.commit() set_committer_to_null = ( "UPDATE analysis_data SET committer_affiliation = NULL " "WHERE committer_raw_email = %s") cursor.execute(set_committer_to_null, (changed_alias['alias'], )) db.commit() reset_author = ("UPDATE analysis_data " "SET author_email = %s " "WHERE author_raw_email = %s") cursor.execute( reset_author, (discover_alias(changed_alias['alias']), changed_alias['alias'])) db.commit reset_committer = ("UPDATE analysis_data " "SET committer_email = %s " "WHERE committer_raw_email = %s") cursor.execute( reset_committer, (discover_alias(changed_alias['alias']), changed_alias['alias'])) db.commit # Update the last fetched date, so we know where to start next time. update_aliases_date = ("UPDATE settings SET value=%s " "WHERE setting = 'aliases_processed'") cursor.execute(update_aliases_date, (aliases_fetched, )) db.commit() # Now rebuild the affiliation data working_author = get_setting('working_author').replace("'", "\\'") if working_author != 'done': log_activity( 'Error', 'Trimming author data in affiliations: %s' % working_author) trim_author(working_author) # Find any authors with NULL affiliations and fill them find_null_authors = ("SELECT DISTINCT author_email AS email, " "MIN(author_date) AS earliest " "FROM analysis_data " "WHERE author_affiliation IS NULL " "GROUP BY author_email") cursor.execute(find_null_authors) null_authors = list(cursor) log_activity('Debug', 'Found %s authors with NULL affiliation' % len(null_authors)) for null_author in null_authors: email = null_author['email'].replace("'", "\\'") store_working_author(email) discover_null_affiliations('author', email) store_working_author('done') # Find any committers with NULL affiliations and fill them find_null_committers = ("SELECT DISTINCT committer_email AS email, " "MIN(committer_date) AS earliest " "FROM analysis_data " "WHERE committer_affiliation IS NULL " "GROUP BY committer_email") cursor.execute(find_null_committers) null_committers = list(cursor) log_activity( 'Debug', 'Found %s committers with NULL affiliation' % len(null_committers)) for null_committer in null_committers: email = null_committer['email'].replace("'", "\\'") store_working_author(email) discover_null_affiliations('committer', email) # Now that we've matched as much as possible, fill the rest as (Unknown) fill_unknown_author = ("UPDATE analysis_data " "SET author_affiliation = '(Unknown)' " "WHERE author_affiliation IS NULL") cursor.execute(fill_unknown_author) db.commit() fill_unknown_committer = ("UPDATE analysis_data " "SET committer_affiliation = '(Unknown)' " "WHERE committer_affiliation IS NULL") cursor.execute(fill_unknown_committer) db.commit() store_working_author('done') log_activity('Info', 'Filling empty affiliations (complete)')
def analysis(): # Run the analysis by looping over all active repos. For each repo, we retrieve # the list of commits which lead to HEAD. If any are missing from the database, # they are filled in. Then we check to see if any commits in the database are # not in the list of parents, and prune them out. # # We also keep track of the last commit to be processed, so that if the analysis # is interrupted (possibly leading to partial data in the database for the # commit being analyzed at the time) we can recover. update_status('Running analysis') log_activity('Info', 'Beginning analysis') start_date = get_setting('start_date') repo_list = "SELECT id,projects_id,path,name FROM repos WHERE status='Active'" cursor.execute(repo_list) repos = list(cursor) for repo in repos: update_analysis_log(repo['id'], 'Beginning analysis') log_activity('Verbose', 'Analyzing repo: %s (%s)' % (repo['id'], repo['name'])) # First we check to see if the previous analysis didn't complete get_status = ("SELECT working_commit FROM repos WHERE id=%s") cursor.execute(get_status, (repo['id'], )) working_commit = cursor.fetchone()['working_commit'] # If there's a commit still there, the previous run was interrupted and # the commit data may be incomplete. It should be trimmed, just in case. if working_commit: trim_commit(repo['id'], working_commit) store_working_commit(repo['id'], '') # Start the main analysis update_analysis_log(repo['id'], 'Collecting data') repo_loc = ('%s%s/%s%s/.git' % (repo_base_directory, repo["projects_id"], repo["path"], repo["name"])) # Grab the parents of HEAD parents = subprocess.Popen([ "git --git-dir %s log --ignore-missing " "--pretty=format:'%%H' --since=%s" % (repo_loc, start_date) ], stdout=subprocess.PIPE, shell=True) parent_commits = set(parents.stdout.read().split(os.linesep)) # If there are no commits in the range, we still get a blank entry in # the set. Remove it, as it messes with the calculations if '' in parent_commits: parent_commits.remove('') # Grab the existing commits from the database existing_commits = set() find_existing = ( "SELECT DISTINCT commit FROM analysis_data WHERE repos_id=%s") cursor.execute(find_existing, (repo['id'], )) for commit in list(cursor): existing_commits.add(commit['commit']) # Find missing commits and add them missing_commits = parent_commits - existing_commits log_activity( 'Debug', 'Commits missing from repo %s: %s' % (repo['id'], len(missing_commits))) for commit in missing_commits: store_working_commit(repo['id'], commit) analyze_commit(repo['id'], repo_loc, commit) store_working_commit(repo['id'], '') update_analysis_log(repo['id'], 'Data collection complete') update_analysis_log(repo['id'], 'Beginning to trim commits') # Find commits which are out of the analysis range trimmed_commits = existing_commits - parent_commits log_activity( 'Debug', 'Commits to be trimmed from repo %s: %s' % (repo['id'], len(trimmed_commits))) for commit in trimmed_commits: trim_commit(repo['id'], commit) update_analysis_log(repo['id'], 'Commit trimming complete') update_analysis_log(repo['id'], 'Analysis complete') log_activity('Info', 'Running analysis (complete)')
from db import cursor s = set() c = csv.reader( open('E:/source/bili_history_sim/resimed.csv', 'r', encoding="utf-8-sig")) for each in c: s.add(each[0]) with open('./get_data/face.py', 'w', encoding="utf-8-sig") as f: f.writelines('face = ' + str(face)) authors = [] for each_author in s: authors.append(each_author) if each_author not in face: cursor.execute(""" SELECT * FROM author WHERE name=%s """, each_author) author_data = cursor.fetchone() face[each_author] = author_data['face'] pass with open('./get_data/face.py', 'w', encoding="utf-8-sig") as f: f.writelines('face = ' + str(face)) for each_author in face: if each_author in color: continue if face[each_author][-3:] == 'gif' or each_author == '开眼视频App': color[each_author] = '#000000' else: color_list = Haishoku.getPalette(face[each_author]) color_list = sorted(color_list, key=lambda x: x[1][0] + x[1][1] + x[1][2]) color[each_author] = 'rgb' + \