def get_candidates(self, X_product_init, X_opening): """Gets potential candidates that could be duplicates from database. Args: X_product_init: Product of the bug report. X_opening: Timestamp when the bug report was created. TODO: Make it work for bug report not in db (unknown id). """ conn = connect_db() query = ''' SELECT dof.id AS duplicate_of_id , dof.short_desc_init AS dof_short_desc_init , dof.desc_init AS dof_desc_init , dof.product_init AS dof_product_init , dof.component_init AS dof_component_init , dof.reporter AS dof_reporter , dof.op_sys_init AS dof_op_sys_init FROM final dof WHERE 1 = 1 -- original should be open when duplicate is created AND '{}' > dof.opening AND '{}' < dof.closing -- original and duplicate assumed to be in the same product AND '{}' = dof.product_init --ORDER BY dof.id --LIMIT 10 '''.format(X_opening, X_opening, X_product_init) result = pd.read_sql_query(query, con=conn) conn.close() return result
def create_table(conn, table): try: if conn == "new": conn = util.connect_db() this_table = table_data.ix[table_data.table == table, :] this_table.reset_index(drop=True, inplace=True) query = "CREATE TABLE {}(".format(table) for ix in xrange(this_table.shape[0]): query = query + str(this_table.field[ix]) + " " + str( this_table.type[ix]) + ", " query = query[:-2] + ")" # drops trailing comma, adds closing parenthesis drop_command = "DROP TABLE IF EXISTS {}".format(table) with conn: cur = conn.cursor() cur.execute(drop_command) cur.execute(query) conn.commit() conn.close() return query except Exception, e: return str(e) + query
def create_table(conn, table): try: if conn == "new": conn = util.connect_db() this_table = table_data.ix[ table_data.table == table, :] this_table.reset_index(drop=True,inplace=True) query = "CREATE TABLE {}(".format(table) for ix in xrange(this_table.shape[0]): query = query + str(this_table.field[ix]) + " " + str(this_table.type[ix]) + ", " query = query[:-2] + ")" # drops trailing comma, adds closing parenthesis drop_command = "DROP TABLE IF EXISTS {}".format(table) with conn: cur = conn.cursor() cur.execute(drop_command) cur.execute(query) conn.commit() conn.close() return query except Exception, e: return str(e)+query
def ratings_report(cond, kind): tstamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') log_dir = 'ratings/REPORT__{t}.log'.format(t=tstamp) log_msgs = [] conn = util.connect_db() max_ratings_ct = 3 sample_size = 20 query = 'select * from photo_ratings_{}_{}'.format(cond, kind) db = pd.read_sql_query(query, conn) gb = db.groupby('url').count() log_msgs = [ 'Photos in {}-{} with NO ratings: {}'.format( cond, kind, np.sum(gb['rater_id'] == 0)) ] max_ratings_on_record = 12 for i in range(max_ratings_on_record): ct = np.sum(gb['rater_id'] > i) log_msgs.append('Photos with more than {} ratings: {}'.format(i, ct)) try: util.log(log_msgs, log_dir, full_path_included=True) return '<br />'.join(log_msgs) except Exception, e: return str(e)
def checkUserExist(userName): conn = util.connect_db(db_connect_command) cursor = conn.cursor() sql_query = "select userId from ulist where userName='******'" cursor.execute(sql_query) conn.commit() conn.close() return type(cursor.fetchall()) is not None
def GET(self, gid): db = util.connect_db() row = util.select_one(db, gid) fields = copy.deepcopy(config.fields) for item in fields: item[-1] = row[item[0]] del db return render.editor(fields, '/edit/' + gid, web.input(msg='').msg)
def insertUser(userName, password, firstName, lastName): conn = util.connect_db(db_connect_command) cursor = conn.cursor() sql_query = "insert into ulist (userid, firstname, lastname, pwd) values('" + userName + "','" + firstName + "','" + lastName + "','" + password + "')" print("before execute " + sql_query) cursor.execute(sql_query) conn.commit() conn.close()
def eval_single_dupl(self, X): """Returns the position of actual duplicate in search results. Args: X (dataframe): Bug report in database that is a duplicate. Returns: tuple (integer, integer): Position of actual duplicate, number of candidates. """ conn = connect_db() cur = conn.cursor() query = ''' SELECT f.id , f.short_desc_init , f.desc_init , f.product_init , f.component_init , f.reporter , f.op_sys_init , f.opening FROM final f WHERE f.id = {} '''.format(X[1]['id']) X_info = pd.read_sql_query(query, con=conn) X_candidates_tmp = self.get_candidates(X_info.iloc[0]['product_init'], X_info.iloc[0]['opening']) # cross join bug report with all candidates X_info['tmp_key'] = 1 X_candidates_tmp['tmp_key'] = 1 X_candidates = pd.merge(X_info, X_candidates_tmp, on='tmp_key') X_candidates.drop(['tmp_key'], axis=1, inplace=True) index_of_actual_duplicate = X_candidates[X_candidates[ 'duplicate_of_id'] == X[1]['duplicate_of_id']].index[0] X_candidates_distances = self.calculate_distances( X_candidates, train=False) probas = self.model.predict_proba(X_candidates_distances)[ :, int(self.model.classes_[np.argmax(self.model.classes_)])] pos_of_actual_duplicate = np.argsort( probas)[::-1].tolist().index(index_of_actual_duplicate) cur.execute(""" INSERT INTO duplicate_eval3 VALUES (%s, %s, %s) """, [X[1]['id'], pos_of_actual_duplicate, len(probas)] ) conn.commit() conn.close() print '{}: positioned as {} of {}'.format(datetime.datetime.now(), pos_of_actual_duplicate, len(probas)) return (pos_of_actual_duplicate, len(probas))
def getUserName(): userId = session.get('userid', None) conn = util.connect_db(db_connect_command) cursor = conn.cursor() sql_query = "select lastName from ulist where userId='" + userId + "'" cursor.execute(sql_query) result = cursor.fetchall() conn.commit() conn.close() return str(result)
def GET(self): i = web.input(page = '1') page = int(i.page) if int(i.page) > 0 else 1 db = util.connect_db() nr_records = list(db.select(config.table, what='count(1) as count'))[0].count pages = (nr_records + config.nr_items - 1) / config.nr_items offset = (page - 1) * config.nr_items rows = list(db.select(config.table, order='gid desc', offset=offset, limit=config.nr_items)) remain = util.get_remain(db) del db return render.list(rows, page, pages, config.fields, remain)
def test_select(self): conn = connect_db() id = 0 cursor = conn.execute('select status from QSBK_AUTHOR where id = %d' % id) print(cursor) #print(cursor.arraysize) for row in cursor: status = row[0] assert status is None print(status) close_db(conn)
def store_anonymous_jokes(jokes): results = [] cur_time = int(time.time()) for joke in jokes: results.append( (joke.md5_content, 0, joke.id, joke.author, joke.num_likes, joke.content.decode('utf-8'), u'qiushibaike', cur_time)) conn = connect_db() print len(set([r[0] for r in results])) conn.executemany('REPLACE INTO JOKE VALUES (?,?,?,?,?,?,?,?)', results) conn.commit() close_db(conn)
def POST(self): i = util.filter_readonly(web.input()) db = util.connect_db() trans = db.transaction() try: util.change_remain(db, i['direction'], i['amount'], cancel=False) i['remain'] = util.get_remain(db) gid = db.insert(config.table, **i) except Exception, e: trans.rollback() logging.warn('insert failed: %s\n%s', str(e), str(i)) return render.msg('insert failed')
def POST(self): i = util.filter_readonly(web.input()) db = util.connect_db() trans = db.transaction() try: util.change_remain(db, i['direction'], i['amount'], cancel = False) i['remain'] = util.get_remain(db) gid = db.insert(config.table, **i) except Exception, e: trans.rollback() logging.warn('insert failed: %s\n%s', str(e), str(i)) return render.msg('insert failed')
def GET(self, gid): web.header('Cache-Control', 'no-cache') db = util.connect_db() trans = db.transaction() try: #还原余额 orig = util.select_one(db, gid) util.change_remain(db, orig['direction'], orig['amount'], cancel=True) db.delete(config.table, where='gid=%d'%int(gid)) except Exception, e: trans.rollback() logging.warn('remove failed: %s\n%s', str(e), str(i)) return render.msg('remove failed')
def POST(self): param = web.input(return_url='/') if not hasattr(param, 'username') or not hasattr(param, 'password'): raise web.seeother('/') db = util.connect_db() check = db.query("SELECT * FROM t_account WHERE accname = '%s' and accpass = '******'" % (param.username, param.password)) if len(check) == 1: #web.setcookie('test', 'cookice_test', 60) session.loginned = True session.username = param.username raise web.seeother('/classlost') else: raise web.seeother('/?err=passerr')
def verify2(medium, uname): try: medium = medium.lower() conn = util.connect_db() query = "SELECT username FROM usernames WHERE username='******' AND medium='{}'".format(uname,medium) cur = conn.cursor() cur.execute(query) rows = cur.fetchall() #fetchone? verified = (rows[0][0] == uname) return jsonify({"verified":verified}) except Exception,e: return str(e)
def add_rating(cond,kind,rater_id,happy,sad,likable,interesting,one_word,description,encoded_url,table="photo_ratings"): ''' Called from Qualtrics after a user has rated a photo - Writes ratings data to photo_ratings db - Increments ratings_ct in meta_ig for that URL ''' tstamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') target_table = "_".join([table, cond, kind]) ''' The reason for the regex sub on url below is because Flask parses forward slash (/) (as well as the encoded %2F) before we can even access incoming parameter strings programmatically. Since we're passing it a url, it barfs when it discovers slashes in the parameter. So we converted / to _____ (5 underscores) on the Qualtrics side, and we back-translate here. 5 underscores may be overkill, but _ or even __ is not out of the question in a url...just being safe. ''' try: url = unquote(encoded_url).replace("_____","/") valid_ratings = False if description == "_" else True fields = ['rater_id','happy','sad','likable','interesting','one_word','description'] values = map(unquote, [rater_id, happy, sad, likable, interesting, one_word, description]) log_dir = 'ratings/{rid}__{t}.log'.format(rid=values[0],url=url,t=tstamp) log_msgs = [] log_msgs.append('\nStarting add_rating for photo url {} [rater id: {}]'.format(url,values[0])) conn = util.connect_db() if valid_ratings: with conn: query1 = "UPDATE meta_ig SET ratings_ct=ratings_ct+1 WHERE url='{}'".format(url) cur = conn.cursor() cur.execute(query1) try: query2 = 'insert into {}(url, rater_id, happy, sad, likable, interesting, one_word, description) values (?,?,?,?,?,?,?,?)'.format(target_table) qvals = (url,)+tuple(values) cur.execute(query2,qvals) except Exception,e: return query2+"__"+str(e) conn.commit() log_msgs.append('\nRating for url: {} [rater id: {}] stored successfully!'.format(url,values[0])) util.log(log_msgs,log_dir,full_path_included=True) return query2 else:
def leaderboards(): conn = util.connect_db(db_connect_command) cursor = conn.cursor() results = [0, ''] try: cursor.execute("SELECT * FROM finishes ORDER BY score ASC LIMIT 6") rows = cursor.fetchall() conn.commit() conn.close() return render_template("leaderboards.html", result=rows) except psycopg2.Error as e: conn.rollback() conn.close() print(errorcodes.lookup(e.pgcode)) return errorcodes.lookup(e.pgcode[:2])
def execute_sql(name, score): conn = util.connect_db(db_connect_command) cursor = conn.cursor() try: cursor.execute("INSERT INTO finishes (score, name) VALUES (%s, %s);", (score, name)) conn.commit() conn.close() print("t2") return render_template("restart.html") except psycopg2.Error as e: conn.rollback() conn.close() print(errorcodes.lookup(e.pgcode)) return errorcodes.lookup(e.pgcode[:2])
def index(): conn = util.connect_db(db_connect_command) if conn == 0: info = 'error' else: cursor = conn.cursor() try: sql_command = 'select * from student;' cursor.execute(sql_command) info = cursor.fetchall() conn.close() except psycopg2.Error as e: conn.close() info = errorcodes.lookup(e.pgcode[:2]) return render_template('index.html', info = info)
def verify2(medium, uname): try: medium = medium.lower() conn = util.connect_db() query = "SELECT username FROM usernames WHERE username='******' AND medium='{}'".format( uname, medium) cur = conn.cursor() cur.execute(query) rows = cur.fetchall() #fetchone? verified = (rows[0][0] == uname) return jsonify({"verified": verified}) except Exception, e: return str(e)
def POST(self, gid): i = util.filter_readonly(web.input()) db = util.connect_db() trans = db.transaction() try: #还原余额 orig = util.select_one(db, gid) util.change_remain(db, orig['direction'], orig['amount'], cancel=True) #更新余额 util.change_remain(db, i['direction'], i['amount'], cancel = False) i['remain'] = util.get_remain(db) db.update(config.table, where='gid=%d'%int(gid), **i) except Exception, e: trans.rollback() logging.warn('edit failed: %s\n%s', str(e), str(i)) return render.msg('edit failed')
def GET(self): i = web.input(page='1') page = int(i.page) if int(i.page) > 0 else 1 db = util.connect_db() nr_records = list(db.select(config.table, what='count(1) as count'))[0].count pages = (nr_records + config.nr_items - 1) / config.nr_items offset = (page - 1) * config.nr_items rows = list( db.select(config.table, order='gid desc', offset=offset, limit=config.nr_items)) remain = util.get_remain(db) del db return render.list(rows, page, pages, config.fields, remain)
def GET(self, gid): web.header('Cache-Control', 'no-cache') db = util.connect_db() trans = db.transaction() try: #还原余额 orig = util.select_one(db, gid) util.change_remain(db, orig['direction'], orig['amount'], cancel=True) db.delete(config.table, where='gid=%d' % int(gid)) except Exception, e: trans.rollback() logging.warn('remove failed: %s\n%s', str(e), str(i)) return render.msg('remove failed')
def generate_non_duplicates(self, X_dupl): """Generates non-duplicate records for given bug report. Args: X_dupl: Bug reports that are duplicates. Returns: dataframe: To each observation on the input attach info about 7 non-duplicates. """ conn = connect_db() df_all_ids = pd.read_sql_query(''' SELECT id, short_desc_init, desc_init, product_init, component_init, reporter, op_sys_init FROM final WHERE product_init='firefox' ''', con=conn) conn.close() duplicates = set(zip(X_dupl['id'], X_dupl['duplicate_of_id'])) X_non_dupl = [] for rownum, row in X_dupl.iterrows(): for i in xrange(7): while True: rownum = random.randint(0, df_all_ids.shape[0] - 1) r = df_all_ids.iloc[rownum] if (row['id'], r['id']) not in duplicates and ( r['id'], row['id']) not in duplicates: break X_non_dupl.append([ row['id'], r['id'], row['short_desc_init'], row['desc_init'], row['product_init'], row['component_init'], row['reporter'], row['op_sys_init'], r['short_desc_init'], r['desc_init'], r['product_init'], r['component_init'], r['reporter'], r['op_sys_init'], ]) X_non_duplicates = pd.DataFrame(X_non_dupl) X_non_duplicates.columns = X_dupl.columns return X_non_duplicates
def get_photo(kind, cond): try: conn = util.connect_db() max_ratings_ct = 3 sample_size = 20 query = 'select * from photo_ratings_{}_{}'.format(cond, kind) db = pd.read_sql_query(query, conn) gb = db.groupby('url').count() # we use 'rater_id' because we need to pick a column that keeps track of the groupby count. happy, sad, etc. also work urls_to_rate = gb.ix[gb.rater_id < max_ratings_ct,'rater_id'].sample(sample_size).index.tolist() urls = {} for i,row in enumerate(urls_to_rate): urls["url"+str(i)] = row return jsonify(urls) except Exception,e: return jsonify({"url0":str(e)})
def execute_sql_display(sql_query=''): ''' begin with select, so return results back ''' conn = util.connect_db(db_connect_command) cursor = conn.cursor() try: cursor.execute(sql_query) result = cursor.fetchall() # conn.commit() conn.close() return str(result) except psycopg2.Error as e: conn.rollback() conn.close() return errorcodes.lookup(e.pgcode[:2])
def execute_sql(sql_query=''): ''' TODO, execute sql and return results for post requests ''' # sql_command = 'select * from student;' conn = util.connect_db(db_connect_command) cursor = conn.cursor() try: cursor.execute(sql_query) conn.commit() conn.close() return 'Query has been Done.' except psycopg2.Error as e: conn.rollback() conn.close() return errorcodes.lookup(e.pgcode[:2])
def generate_non_duplicates(self, X_dupl): """Generates non-duplicate records for given bug report. Args: X_dupl: Bug reports that are duplicates. Returns: dataframe: To each observation on the input attach info about 7 non-duplicates. """ conn = connect_db() df_all_ids = pd.read_sql_query(''' SELECT id, short_desc_init, desc_init, product_init, component_init, reporter, op_sys_init FROM final WHERE product_init='firefox' ''', con=conn) conn.close() duplicates = set(zip(X_dupl['id'], X_dupl['duplicate_of_id'])) X_non_dupl = [] for rownum, row in X_dupl.iterrows(): for i in xrange(7): while True: rownum = random.randint(0, df_all_ids.shape[0] - 1) r = df_all_ids.iloc[rownum] if (row['id'], r['id']) not in duplicates and (r['id'], row['id']) not in duplicates: break X_non_dupl.append([ row['id'], r['id'], row['short_desc_init'], row['desc_init'], row['product_init'], row['component_init'], row['reporter'], row['op_sys_init'], r['short_desc_init'], r['desc_init'], r['product_init'], r['component_init'], r['reporter'], r['op_sys_init'], ]) X_non_duplicates = pd.DataFrame(X_non_dupl) X_non_duplicates.columns = X_dupl.columns return X_non_duplicates
def get_photo(kind, cond): try: conn = util.connect_db() max_ratings_ct = 3 sample_size = 20 query = 'select * from photo_ratings_{}_{}'.format(cond, kind) db = pd.read_sql_query(query, conn) gb = db.groupby('url').count() # we use 'rater_id' because we need to pick a column that keeps track of the groupby count. happy, sad, etc. also work urls_to_rate = gb.ix[gb.rater_id < max_ratings_ct, 'rater_id'].sample(sample_size).index.tolist() urls = {} for i, row in enumerate(urls_to_rate): urls["url" + str(i)] = row return jsonify(urls) except Exception, e: return jsonify({"url0": str(e)})
def evaluate(self, X_test): """Evaluates the model in parallel, stores result in db.""" conn = connect_db() cur = conn.cursor() cur.execute(""" CREATE TABLE IF NOT EXISTS duplicate_eval ( id bigint NOT NULL, dupl_pos int, candidates int ); """) conn.commit() conn.close() results = ProcessingPool().map(self.eval_single_dupl, X_test.iterrows()) return results
def POST(self, gid): i = util.filter_readonly(web.input()) db = util.connect_db() trans = db.transaction() try: #还原余额 orig = util.select_one(db, gid) util.change_remain(db, orig['direction'], orig['amount'], cancel=True) #更新余额 util.change_remain(db, i['direction'], i['amount'], cancel=False) i['remain'] = util.get_remain(db) db.update(config.table, where='gid=%d' % int(gid), **i) except Exception, e: trans.rollback() logging.warn('edit failed: %s\n%s', str(e), str(i)) return render.msg('edit failed')
def GET(self): if not session.get('loginned', False): raise web.seeother('/') param = web.input() if not hasattr(param, 'classid'): return render.Tclslost(None) if not param.classid.isdigit(): return render.Tclslost(None) db = util.connect_db() dbitems = db.query("SELECT * FROM t_gc_packetlost WHERE classid = %s ORDER BY usrdbid ASC, stream, recordtime" % param.classid) cl = ClassLostCollect(param.classid) cl.initgc(dbitems) dbitems = db.query("SELECT * FROM t_gg_packetlost WHERE classid = %s ORDER BY mg_sour ASC, recordtime" % param.classid) cl.initgg(dbitems) dbitems = db.query("SELECT * FROM t_disconnect WHERE classid = %s ORDER BY usrdbid ASC, recordtime" % param.classid) cl.initdis(dbitems) return render.Tclslost(cl)
def get_auth(medium,username): try: conn = util.connect_db() callback = acquire_url_base+'?medium={}&username={}'.format(medium,username) tokens = util.get_tokens(conn, medium) if medium == "twitter": session['APP_KEY'] = tokens[0] session['APP_SECRET'] = tokens[1] twitter = Twython(session['APP_KEY'], session['APP_SECRET']) auth = twitter.get_authentication_tokens(callback_url=callback) session['OAUTH_TOKEN'] = auth['oauth_token'] session['OAUTH_TOKEN_SECRET'] = auth['oauth_token_secret'] return redirect(auth['auth_url']) elif medium == "instagram": CONFIG = { 'client_id': tokens[2], 'client_secret': tokens[3], 'redirect_uri': callback } api = InstagramAPI(**CONFIG) session['APP_KEY'] = tokens[2] session['APP_SECRET'] = tokens[3] url = api.get_authorize_url(scope=["basic"]) return redirect(url) except Exception, e: return str(e)
def get_auth(medium, username): try: conn = util.connect_db() callback = acquire_url_base + '?medium={}&username={}'.format( medium, username) tokens = util.get_tokens(conn, medium) if medium == "twitter": session['APP_KEY'] = tokens[0] session['APP_SECRET'] = tokens[1] twitter = Twython(session['APP_KEY'], session['APP_SECRET']) auth = twitter.get_authentication_tokens(callback_url=callback) session['OAUTH_TOKEN'] = auth['oauth_token'] session['OAUTH_TOKEN_SECRET'] = auth['oauth_token_secret'] return redirect(auth['auth_url']) elif medium == "instagram": CONFIG = { 'client_id': tokens[2], 'client_secret': tokens[3], 'redirect_uri': callback } api = InstagramAPI(**CONFIG) session['APP_KEY'] = tokens[2] session['APP_SECRET'] = tokens[3] url = api.get_authorize_url(scope=["basic"]) return redirect(url) except Exception, e: return str(e)
def correctCredentials(userName, Password): print("start check") conn = util.connect_db(db_connect_command) cursor = conn.cursor() print("userName " + userName) print("Password " + Password) sql_query = "select userId from ulist where userid='" + userName + "' and pwd='" + Password + "'" print("before execute") cursor.execute(sql_query) print(sql_query) rec = 0 for row in cursor: rec = rec + 1 result = row[0] conn.commit() conn.close() if rec > 0: session['userid'] = str(result) print("true") return True else: print("false") return False
def ratings_report(cond, kind): tstamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') log_dir = 'ratings/REPORT__{t}.log'.format(t=tstamp) log_msgs = [] conn = util.connect_db() max_ratings_ct = 3 sample_size = 20 query = 'select * from photo_ratings_{}_{}'.format(cond,kind) db = pd.read_sql_query(query, conn) gb = db.groupby('url').count() log_msgs = ['Photos in {}-{} with NO ratings: {}'.format(cond, kind, np.sum(gb['rater_id'] == 0))] max_ratings_on_record = 12 for i in range(max_ratings_on_record): ct = np.sum(gb['rater_id'] > i) log_msgs.append('Photos with more than {} ratings: {}'.format(i,ct)) try: util.log(log_msgs,log_dir,full_path_included=True) return '<br />'.join(log_msgs) except Exception,e: return str(e)
from util import load_config, connect_db from metrics import graphs if __name__ == '__main__': config = load_config() conn = connect_db(config) graphs.average_distinct_partners(conn)
def analyze_feature_set(args, connection, feature_names): """Analyzes and plots various standard metrics for each feature.""" for name in feature_names: values = util.get_feature_values(connection.cursor(), name) stats.analyze_feature(connection, name, values) if args.visualize: visualize.plot_feature(connection, name, values) if __name__ == '__main__': from util import load_config, connect_db CONFIG = load_config() connection = connect_db(CONFIG) model_feature_names = map(lambda f: f.__name__, MODEL_FEATURES) cluster_feature_names = map(lambda f: f.__name__, CLUSTER_FEATURES) algorithm_names = map(lambda a: a.__name__, clustering.ENABLED_ALGORITHMS) timespan = lambda s: tuple(map(int, s.split(',', 1))) import argparse parser = argparse.ArgumentParser(description="Parses sentences.") parser.add_argument('-b', '--batch-size', help="batch size", type=int, default=500) parser.add_argument('-t', '--timespan', help="time span to use: from,to in seconds since unix epoch", default=(None, None), type=timespan) parser.add_argument('--reset', help="clear features before rebuilding", action='store_true', default=False) parser.add_argument('--create', help="attempt to create user model table before starting", action='store_true', default=False) parser.add_argument('--analyze', help="analyze features", action='store_true', default=False)
print 'Commit complete for {} [TABLE: {}, COND: {}, DTYPE: {}]'.format( uname, table_name, condition, date_type) except Exception, e: print 'Error in writing count days for condition: {} | turning point: {} [ERROR: {}]'.format( condition, date_type, str(e)) def count_days_from_turning_point_wrapper(conn): ''' Counts the number of days from turning point (either diagnosis or suspected date) for each social media post, for a given user and a given condition. - Values of counts are +/- integers (-X = X days before turning point, +X = X days after turning point) - Count fields are named with the format: d_from_{turning_point}_{condition}, eg. d_from_diag_pregnancy - Attemps count for all rows in meta_ig/meta_tw which lack a count in all conditions (so might be more rows than the most recent batch of survey respondents) ''' conditions = ['pregnancy', 'cancer', 'ptsd', 'depression'] for condition in conditions: for medium in ['tw', 'ig']: count_days_from_turning_point(conn, condition, medium) if __name__ == '__main__': control_collection = False conn = util.connect_db() conn.text_factory = str add_survey_data(conn, control=control_collection) collect(conn) if not control_collection: add_monthnum(conn) count_days_from_turning_point_wrapper(conn)
from __future__ import division import pandas as pd import numpy as np import datetime import pickle import random from util import connect_db from sklearn.cross_validation import train_test_split from duplicate_pipeline import DuplicatePipeline if __name__ == '__main__': conn = connect_db() print '{}: getting data'.format(datetime.datetime.now()) query = ''' SELECT d.id , d.duplicate_of_id , f.short_desc_init , f.desc_init , f.product_init , f.component_init , f.reporter , f.op_sys_init , dof.short_desc_init AS dof_short_desc_init , dof.desc_init AS dof_desc_init , dof.product_init AS dof_product_init , dof.component_init AS dof_component_init , dof.reporter AS dof_reporter , dof.op_sys_init AS dof_op_sys_init
from flask import Flask, request, render_template, url_for, redirect, flash import sys, os, getpass import util util.cd_script_path() sys.path.append("../requetes") from listeConsommationsMensuelles import ListeConsommationsMensuelles from listeEquipements import ListeEquipements from listeCentrales import ListeCentrales from listeAbonnes import ListeAbonnes from listeVilles import ListeVilles from listeBris import ListeBris from requetes import Requetes app = Flask(__name__, template_folder=util.get_templates_path()) req = util.connect_db(Requetes) util.define_admin_password(req) """ Brief: Ce conteneur permet d'échanger des données entre les différentes pages et évite de dupliquer inutilement les requêtes à la base de données. """ cache = {} """ Brief: Cette fonction permet de rendre la racine du siteweb. Chacun de ses bouton déclenche une autre fonction associée. Le radio button règle l'ordre de présentation des bris. Cette fonction inscrit des données dans le conteneur global cache. La page affichée contient trois boutons: liste_villes: Qui charge la page listeVilles.html liste_centrales: Qui charge la page listeCentrales.html
def add_rating(cond, kind, rater_id, happy, sad, likable, interesting, one_word, description, encoded_url, table="photo_ratings"): ''' Called from Qualtrics after a user has rated a photo - Writes ratings data to photo_ratings db - Increments ratings_ct in meta_ig for that URL ''' tstamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') target_table = "_".join([table, cond, kind]) ''' The reason for the regex sub on url below is because Flask parses forward slash (/) (as well as the encoded %2F) before we can even access incoming parameter strings programmatically. Since we're passing it a url, it barfs when it discovers slashes in the parameter. So we converted / to _____ (5 underscores) on the Qualtrics side, and we back-translate here. 5 underscores may be overkill, but _ or even __ is not out of the question in a url...just being safe. ''' try: url = unquote(encoded_url).replace("_____", "/") valid_ratings = False if description == "_" else True fields = [ 'rater_id', 'happy', 'sad', 'likable', 'interesting', 'one_word', 'description' ] values = map(unquote, [ rater_id, happy, sad, likable, interesting, one_word, description ]) log_dir = 'ratings/{rid}__{t}.log'.format(rid=values[0], url=url, t=tstamp) log_msgs = [] log_msgs.append( '\nStarting add_rating for photo url {} [rater id: {}]'.format( url, values[0])) conn = util.connect_db() if valid_ratings: with conn: query1 = "UPDATE meta_ig SET ratings_ct=ratings_ct+1 WHERE url='{}'".format( url) cur = conn.cursor() cur.execute(query1) try: query2 = 'insert into {}(url, rater_id, happy, sad, likable, interesting, one_word, description) values (?,?,?,?,?,?,?,?)'.format( target_table) qvals = (url, ) + tuple(values) cur.execute(query2, qvals) except Exception, e: return query2 + "__" + str(e) conn.commit() log_msgs.append( '\nRating for url: {} [rater id: {}] stored successfully!'. format(url, values[0])) util.log(log_msgs, log_dir, full_path_included=True) return query2 else:
def eval_single_dupl(self, X): """Returns the position of actual duplicate in search results. Args: X (dataframe): Bug report in database that is a duplicate. Returns: tuple (integer, integer): Position of actual duplicate, number of candidates. """ conn = connect_db() cur = conn.cursor() query = ''' SELECT f.id , f.short_desc_init , f.desc_init , f.product_init , f.component_init , f.reporter , f.op_sys_init , f.opening FROM final f WHERE f.id = {} '''.format(X[1]['id']) X_info = pd.read_sql_query(query, con=conn) X_candidates_tmp = self.get_candidates(X_info.iloc[0]['product_init'], X_info.iloc[0]['opening']) # cross join bug report with all candidates X_info['tmp_key'] = 1 X_candidates_tmp['tmp_key'] = 1 X_candidates = pd.merge(X_info, X_candidates_tmp, on='tmp_key') X_candidates.drop(['tmp_key'], axis=1, inplace=True) index_of_actual_duplicate = X_candidates[ X_candidates['duplicate_of_id'] == X[1] ['duplicate_of_id']].index[0] X_candidates_distances = self.calculate_distances(X_candidates, train=False) probas = self.model.predict_proba( X_candidates_distances )[:, int(self.model.classes_[np.argmax(self.model.classes_)])] pos_of_actual_duplicate = np.argsort(probas)[::-1].tolist().index( index_of_actual_duplicate) cur.execute( """ INSERT INTO duplicate_eval3 VALUES (%s, %s, %s) """, [X[1]['id'], pos_of_actual_duplicate, len(probas)]) conn.commit() conn.close() print '{}: positioned as {} of {}'.format(datetime.datetime.now(), pos_of_actual_duplicate, len(probas)) return (pos_of_actual_duplicate, len(probas))
OAUTH_TOKEN = access_token api = InstagramAPI(access_token=access_token, client_secret=CONFIG['client_secret']) userid = user_info['id'] username = user_info['username'] post_ct = api.user().counts['media'] else: return "Uhoh no code provided" except Exception,e: return "Error in acquire step 1: "+str(e) try: if username == alleged_user: unique_id = np.random.randint(1e10) conn = util.connect_db() if medium=="twitter": register_user(medium, userid, username, unique_id, post_ct, conn, OAUTH_TOKEN, OAUTH_TOKEN_SECRET) elif medium=="instagram": register_user(medium, userid, username, unique_id, post_ct, conn, OAUTH_TOKEN,'') return '<span style="font-size:24pt;color:green;">USERNAME {} CONFIRMED!</span>'.format(username) else: return 'The username you just used to grant access to this app (<b>{actual}</b>) is not the same username you provided in the study survey (<b>{alleged}</b>). <br />Please go back to <a href="{oauth}">the app authorization page</a> and make sure you are logged in as the correct user, and try again. <br />(You may need to log out of your account first in a separate window.)'.format(actual=username,alleged=alleged_user,oauth=oauth_url) except Exception,e: return "Error in acquire step 2:"+str(e) except Exception,e: return 'There was an error, please go back to {} and retry. [ERROR: {}]'#.format(oauth_url,str(e))
import util from export import neo4j def get_all_relationships(cursor): cursor.execute('SELECT * FROM user_graph') return cursor.fetchall() def run(connection): print "Fetching all relationships" relationships = get_all_relationships(connection.cursor()) print " - fetched %d relationships" % len(relationships) print "Exporting to neo4j" neo4j.export(relationships) print "\n - done" if __name__ == '__main__': connection = util.connect_db(util.load_config()) run(connection)