Esempio n. 1
0
def process_water_meter(data):
	series=[]
	for param in data['parameters']:
		query='select timestamp,state from water_data where timestamp between %d and %d and meter_id= %d;' %(data['start'],data['end'],int(param))
		try:
			result=psql.frame_query(query,mysql_conn['water_meter'])
		except Exception, e:
			mysql_conn['smart_meter']=MySQLdb.connect(user='******',passwd='password',db='smart_meter');
			mysql_conn['jplug']=MySQLdb.connect(user='******',passwd='password',db='jplug');
			mysql_conn['water_meter']=MySQLdb.connect(user='******',passwd='password',db='water_meter');
			mysql_conn['multisensor']=MySQLdb.connect(user='******',passwd='password',db='multisensor')
			result=psql.frame_query(query,mysql_conn['water_meter'])
		
		result.index=pd.to_datetime(result.timestamp*1e9)
		result=result.drop('timestamp',1)
		freq_downsampled=calculate_downsampling_frequency(result)
		if freq_downsampled is not None:
			result=result.resample(freq_downsampled, how='max')
			result=result.dropna()
		num_rows=len(result[result.columns[0]].values)
		temp=np.empty((num_rows,2))
		#Subtracting 5.5 hrs to ensure, we always get UTC time
		x=result.index.astype(int)/1e6+5.5*60*60*1000
		temp[:,0]=x
		for key in result:
			temp[:,1]=result[key].values    
			series.append({'name':key+" "+water_meter_mapping[param],'data':temp.tolist()})
Esempio n. 2
0
    def remeasureFeatFlagDB(self):

        conn = conDB(self.host,self.dbname,passwd=self.passwd,port=self.port, user=self.user)
        import pandas.io.sql as psql
        pageIds = psql.frame_query("SELECT pageid FROM testing2 WHERE featured = 1 OR flagged = 1", conn)

        # go through each page, rescrape features and write out readability to database
        print("Calculating readability & scores & writing to database...")
        for i in range(len(pageIds.values)):
            p = pageIds.values[i]
            sql = "SELECT * FROM testing2 WHERE pageid = %s" % p[0]
            featuresDF = psql.frame_query(sql, conn)
            print i, p[0], featuresDF['reading_ease'][0]
            new_row = self.getWikiPageMeta(title=featuresDF['title'][0], pageid=p[0])
            print new_row

            try:
                sql = "UPDATE testing2 SET grade_level = %s WHERE pageId = %s" % (str(new_row['grade_level']),str(p[0]))
                curDB(conn).execute(sql)
                sql = "UPDATE testing2 SET reading_ease = %s WHERE pageId = %s" % (str(new_row['reading_ease']),str(p[0]))
                curDB(conn).execute(sql)
                conn.commit()
            except:
                print "Skipping for testing2:  "
                pass            
                        
        closeDB(conn)
Esempio n. 3
0
def process_smart_meter(data):
	param_string=""
	for param in data['parameters']:
		param_string=param_string+param+","
	query='select timestamp,'+param_string
	query=query[:-1]+' from %s_data where timestamp between %d and %d;' %(data['sensor'],data['start'],data['end'])
	print query
	try:
		result=psql.frame_query(query,mysql_conn[data['sensor']])
	except:
		mysql_conn['smart_meter']=MySQLdb.connect(user='******',passwd='password',db='smart_meter');
		mysql_conn['jplug']=MySQLdb.connect(user='******',passwd='password',db='jplug');
		mysql_conn['water_meter']=MySQLdb.connect(user='******',passwd='password',db='water_meter');
		mysql_conn['multisensor']=MySQLdb.connect(user='******',passwd='password',db='multisensor');
		result=psql.frame_query(query,mysql_conn[data['sensor']])
	result.index=pd.to_datetime(result.timestamp*1e9)
	result=result.drop('timestamp',1)
	freq_downsampled=calculate_downsampling_frequency(result)
	if freq_downsampled is not None:
		result=result.resample(freq_downsampled, how='mean')
		result=result.dropna()
	series=[]
	num_rows=len(result[result.columns[0]].values)
	temp=np.empty((num_rows,2))
	#Subtracting 5.5 hrs to ensure, we always get UTC time
	x=result.index.astype(int)/1e6+5.5*60*60*1000
	temp[:,0]=x
	for key in result:
		temp[:,1]=result[key].values    
		series.append({'name':key,'data':temp.tolist()})
	return json.dumps(series)
Esempio n. 4
0
def get_geonet_ftec_data_sv(sv, tstart, tstop):
    '''
    Query the GEONET database for filtered tec data from all receivers in a certain time span
    for a given satellite.
    INPUTS:
        sv - integer, satellite PRN id
        tstart - datetime, start of time window
        tstop  - datetime, end of time window
    OUTPUTS:
        frame - an object containing the desired GPS data:
                rxID, time [POSIX], svPRN, slant TEC [tecu], ,filtered TEC [tecu], el [deg], and az [deg]
    '''

    # Open a connection to the database
    con = mdb.connect(host=hostname,user='******',passwd='bdpass',db='gpsdatabase')

    # This is the UT POSIX time for the start/stop times of interest
    startUT = calendar.timegm(tstart.timetuple())
    stopUT =  calendar.timegm(tstop.timetuple())

    # First find out if the entry is in there (i.e., we are just updating the png and avi file)
    sql_cmd = 'SELECT rxID, UT, svPRN, sTEC, el, az ' + \
              'FROM tecdata WHERE svPRN = %d AND UT >= %d AND UT <= %d' % (sv, startUT, stopUT)
    frame1 = psql.frame_query(sql_cmd, con=con)
    
    # Get the filtered TEC data
    sql_cmd = 'SELECT rxID, UT, svPRN, fTEC FROM ftecdata WHERE svPRN = %d AND UT >= %d and UT <= %d' %(sv, startUT, stopUT)
    frame2 = psql.frame_query(sql_cmd, con=con)

    # Join
    frame = pd.merge(frame1,frame2,how='inner',on=['UT','rxID','svPRN'])

    con.close()

    return frame
Esempio n. 5
0
def get_rx_ftec_data(rxID, tstart, tstop):
    '''
    Query the GEONET database for filtered tec data from a receiver in a certain time span.
    INPUTS:
        rxID - integer, receiver ID
        tstart - datetime, start of time window
        tstop  - datetime, end of time window
    OUTPUTS:
        frame - an object containing the desired GPS data:
                rxID, time [POSIX], svPRN, filtered TEC [tecu], el [deg], and az [deg]
    '''

    # Open a connection to the database
    con = mdb.connect(host=hostname,user='******',passwd='bdpass',db='gpsdatabase')

    # This is the UT POSIX time for the start/stop times of interest
    startUT = calendar.timegm(tstart.timetuple())
    stopUT =  calendar.timegm(tstop.timetuple())

    # Grab the slant TEC data (need the el/az from this)
    sql_cmd = 'SELECT rxID, UT, svPRN, sTEC, el, az ' + \
              'FROM tecdata WHERE rxID = %s AND UT >= %d AND UT <= %d' % (rxID, startUT, stopUT)
    frame1 = psql.frame_query(sql_cmd, con=con)
    
    # Grab the filtered TEC
    sql_cmd = 'SELECT rxID, UT, svPRN, fTEC ' +\
              'FROM ftecdata where rxID = %s and UT >= %d AND UT <= %d' % (rxID, startUT, stopUT)
    frame2 = psql.frame_query(sql_cmd, con=con)

    con.close()

    # Join the two frames
    frame = pd.merge(frame1, frame2, how='inner', on=['UT', 'rxID', 'svPRN']);

    return frame
Esempio n. 6
0
def get_geonet_ftec_data_at_time(t):
    '''
    Query the GEONET filtered TEC database for data for all receivers at a certain time.
    INPUTS:
        t - datetime, desired sample time
    OUTPUTS:
        frame - an object containing the desired GPS data:
                rxID, time [POSIX], svPRN, slat TEC [tecu], filtered TEC [tecu], el [deg], and az [deg]
    '''
    # Open a connection to the database
    con = mdb.connect(host=hostname,user='******',passwd='bdpass',db='gpsdatabase')

    # This is the UT POSIX time for the time of interest
    UT = calendar.timegm(t.timetuple())

    # First find out if the entry is in there (i.e., we are just updating the png and avi file)
    sql_cmd = 'SELECT rxID, UT, svPRN, sTEC, el, az FROM tecdata WHERE UT = %d' \
              % (UT)
    frame1 = psql.frame_query(sql_cmd, con=con)

    sql_cmd = 'SELECT rxID, UT, svPRN, fTEC FROM ftecdata WHERE UT = %d' % (UT)
    frame2 = psql.frame_query(sql_cmd, con=con)

    # Join
    frame = pd.merge(frame1,frame2,how='inner',on=['UT','rxID','svPRN'])

    con.close()

    return frame
Esempio n. 7
0
    def setUpData(self):
    
        debug, host, port, user, passwd, dbname, localpath = grabDatabaseSettingsFromCfgFile(configFileName)
        self.qualityPredictorFile = localpath + 'app/qualityPredictorFile.p'
        conn = conDB(host,dbname,passwd,port, user)	
        #featured_csvfilename = '/Users/ahna/Documents/Work/insightdatascience/project/wikiphilia/webapp/datasets/featured.csv'
        #flagged_csvfilename = '/Users/ahna/Documents/Work/insightdatascience/project/wikiphilia/webapp/datasets/flagged.csv'

        # load data from database
        import pandas.io.sql as psql
        DF = psql.frame_query("SELECT * FROM testing2 WHERE flagged = 1 OR featured = 1", conn)        
        DF = DF[~np.isnan(DF.meanWordLength)]   # remove rows with NaN in meanWordLength
        DF = DF[~np.isnan(DF.reading_ease)]     # remove rows with NaN in reading_ease  
        X = DF[self.iUseFeatures].values         #.astype('float')
        
        if self.bUsingBuckets: 
            maxes = list()
            for i in range(len(self.iUseFeatures)):
                sql = "SELECT AVG({}), STD({}) FROM testing2 WHERE featured = 1".format(self.iUseFeatures[i],self.iUseFeatures[i])
                temp = psql.frame_query(sql, conn)
                maxes.append(sum(temp.ix[0])) # add up average + standard dev of featued pages to be used for the maximum bucket value
            self.computeBuckets(X,maxes)                  # for some of the features, compute buckets so as not to use raw values but use "low","med","high" or more values
            X = self.getAllBuckets(X)               # put some of the features into buckets
            
        y = DF['score'].values                  # labels are 1 if featured is True, 0 if flagged is True
        np.random.seed()                        # divide into training set and test set
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(\
        X, y, test_size=self.testPropor, train_size=1-self.testPropor, random_state=np.random.random_integers(100))
        closeDB(conn)
    def executa_query(self, query, get_data_frame=False):
        """ Executes a SQL query.

            @brief Local variable:

                dados -
                    Receives the query result

            @param query A SQL instruction
            @param get_data_frame   Determines the format of the query results.
                Sets False as default
            @return Query with the result
        """

        dados = None

        if (get_data_frame is False):
            dados = psql.frame_query(
                query,
                con=self.conexao
            ).to_dict()
        else:
            dados = psql.frame_query(
                query,
                con=self.conexao
            )

        return dados
Esempio n. 9
0
def read_GrpAttrs(sFileDB, nStartFrm, nEndFrm):
    conn = lite.connect(sFileDB)
    sRange = "Frame>=" + str(nStartFrm) + " and Frame<=" + str(nEndFrm)
    dfIndAttrs = psql.frame_query("SELECT * FROM IndAttrs WHERE " + sRange,
                                  conn)
    dfGrp_Attrs = psql.frame_query("SELECT * FROM GrpAttrs WHERE " + sRange,
                                   conn)
    return dfIndAttrs, dfGrp_Attrs
Esempio n. 10
0
def event_aggregation(users):
    #Lista de eventos
    json_node_events=[]
    #conexión a la base de datos local
    conexion=db.mySQLConect("localhost","root","lego","dashboard")
    #events+reports
    query="SELECT dashboard.reports.id as report_id,dashboard.timeline_events.id as event_id, dashboard.reports.node_id,dashboard.reports.status, dashboard.reports.time FROM dashboard.reports,dashboard.timeline_events where timestampdiff(MINUTE,dashboard.timeline_events.updated_at,dashboard.reports.time)<=3 AND  dashboard.reports.time>=dashboard.timeline_events.updated_at AND node_id=subject_id AND event_type='updated';";
    er=psql.frame_query(query,conexion.db)
    #Para todos los nodos que poseen un nombre de host asociado
    for idx,u in users[users.host.notnull()].iterrows():
        #variables para rellenar el registro
        nid=u['code']
        user=u['user']
        host=u['host']
        course=u['course']
        #busco en la base de datos los eventos relacionados con ese codigo de autoes
        query="SELECT * FROM timeline_events WHERE (subject_id ="+str(nid)+" AND subject_type='Node') OR (secondary_subject_id="+str(nid)+");"
        node_events=psql.frame_query(query,conexion.db)
        #siempre el alumno empieza en la práctica 1
        current_practice=1
        #para cada evento encontrado
        for idx,event in node_events.iterrows():    
            #Evento de creación del usuario de autoes
            if event['event_type']=="created":
                register_event={ "user":user, "course":course,"host":host,"type":"created","report":None,"practice":current_practice, "date":event['created_at']}
                json_node_events.append(register_event)
            #Evento de moverse de práctica
            elif event['event_type']=="added_to":
                #si solo avanza una práctica
                if(current_practice+1==event['subject_id']):
                    register_event={ "user":user,"course":course,"host":host, "type":"added_to","report":None,"practice":event['subject_id'], "date":event['created_at']}
                    current_practice=event['subject_id']
                    json_node_events.append(register_event)
                #si avanza varias practicas a la vez
                else:
                    for i in range(current_practice,event['subject_id']+1):
                        register_event={ "user":user,"course":course,"host":host, "type":"added_to","report":None,"practice":i, "date":event['created_at']}
                        json_node_events.append(register_event)           
                    current_practice=event['subject_id']   
            #Evento updated: significa o bien login, o bien evaluación correcta o bien evaluación fallida
            elif event['event_type']=="updated":     
                #buscamos el informe
                report_event=er[er.event_id==event['id']]
                #para cada informe posible
                for idx2,report in report_event.iterrows():
                    register_event={ "user":user,"course":course,"host":host, "type":report['status'],"report":report['report_id'],"practice":current_practice, "date":event['created_at']}
                    json_node_events.append(register_event)
            #Evento para salir de una práctia
            elif event['event_type']=="removed_from":
                register_event={ "user":user, "course":course,"host":host, "type":"removed_from","report":None,"practice":event['subject_id'], "date":event['created_at']}
                json_node_events.append(register_event)
            #evento de borrado de la plataforma: muy raro
            elif event['event_type']=="removed":
                register_event={ "user":user, "course":course,"host":host,"type":"removed","report":None,"practice":current_practice, "date":event['created_at']}
                json_node_events.append(register_event)

    events_frame=DataFrame(json_node_events)
    return events_frame
	def executa_query(self, query, get_data_frame=False):
		dados = None
		
		if (get_data_frame is False):
			dados = psql.frame_query(query, con=self.conexao).to_dict()
		else:
			dados = psql.frame_query(query, con=self.conexao)

		if (dados is None):
			raise ResultadoConsultaNuloError("A biblioteca pandas não está instalada, ou nenhum dado foi passado a esse método")
		else:
			return dados
Esempio n. 12
0
def MySQL_getdata(sql_table,train_per,test_per,features):
	#Getting data from MySql
	con = MySQLdb.connect(host="opt0.madvertise.net",user="******", passwd="27f7b0bc",db="dse")
		
	sql = "SELECT "+', '.join(features)+", SUM(`clicks`) as clicks, SUM(`instances`) as instances FROM `"+sql_table+"` WHERE date in ('"+'\',\''.join(test_per)+"') GROUP BY "+ ', '.join(features)+";"
	test_df = psql.frame_query(sql,con)
		
	sql = "SELECT "+', '.join(features)+", SUM(`clicks`) as clicks, SUM(`instances`) as instances FROM `"+sql_table+"` WHERE date >= '"+train_per[0]+"' AND date <= '"+train_per[1]+"' GROUP BY "+ ', '.join(features)+";"
	train_df = psql.frame_query(sql,con)
	
	con.close()
	
	return train_df,test_df
Esempio n. 13
0
def main():
    global snp
    global nasdaq
    global usa_events
    global usa_processed
    with mysql_cn:
        snp = psql.frame_query(config.SQL.get('SNP'), con=mysql_cn)
        nasdaq = psql.frame_query(config.SQL.get('NASDAQ'), con=mysql_cn)
        usa_events = psql.frame_query(config.SQL.get('USA_EVT'), con=mysql_cn)
        usa_processed = psql.frame_query(config.SQL.get('USA_PROCESSED'), con=mysql_cn)
        #fill_trading_stats_in_db()
        #fill_event_severity()
        show_stocks_correlation()
        calc_correlation()
Esempio n. 14
0
def user_aggregation():
    #Conexión a la base de datos MySQL
    dashboard=db.mySQLConect("localhost","root","lego","dashboard")
    foros=db.mySQLConect("localhost","root","lego","rita")
    #Lectura de los nodos de autoes
    nodos= psql.frame_query('select * from nodes;', dashboard.db)
    #Lectura de los usuarios de los foros
    usuarios=psql.frame_query('select * from user;', foros.db)
    #Lista con los usuarios: luego habrá que insertarlos en Mongo pero primero hay que calcular correctamente
    final_users=[]
    #Listado de usuarios especiales, que tienen una asociación concreta
    personal_rel=[(5,8),(1,25),(1,42),(9,15),(11,31),(6,47),(7,10),(7,41),(13,22),(14,77),(12,9),(15,11),(20,34),(23,45),(23,35),(24,36),(38,38)]
    #Para cada usuario de los foros
    for idx,user in usuarios.iterrows():
        #Calculamos un posible hostname y lo buscamos en la tabla
        hostname=user['name'].lower().split()[0][0]+user['name'].lower().split()[1].encode()
        autoes_user=nodos[nodos['name'].str.contains(hostname)]
        #Si lo hemos encontrado
        if len(autoes_user)>0:
            for idx,au in autoes_user.iterrows():
                if(au['created_at'].date()<datetime.strptime('08/01/11', "%m/%d/%y").date()): 
                    course='2010-2011'
                elif(au['created_at'].date()<datetime.strptime('08/01/12', "%m/%d/%y").date()): 
                    course='2011-2012'
                else:
                    course='2012-2013'
                new_user={"code":au['id'], "user":user['iduser'], "name":user['name'], "host":au['name'], "course":course}
                final_users.append(new_user)
        #usuarios cuyo nombre no es regular: los hemos buscado a mano :(
        elif user['iduser'] in (x[0] for x in personal_rel):
            hosts=[(x,y) for (x,y) in personal_rel if x == user['iduser']]
            for host in hosts:
                au=nodos[nodos.id==host[1]].iloc[0]
                if(au['created_at'].date()<datetime.strptime('08/01/11', "%m/%d/%y").date()): 
                    course='2010-2011'
                elif(au['created_at'].date()<datetime.strptime('08/01/12', "%m/%d/%y").date()): 
                    course='2011-2012'
                else:
                    course='2012-2013'
                code=long(au['id'])
                host=au['name']
                new_user={"code":code, "user":user['iduser'], "name":user['name'], "host":host, "course":course}
                final_users.append(new_user)
        #Sino
        else:
            new_user={"code":None, "user":user['iduser'], "name":user['name'], "host":None, "course": None}
            final_users.append(new_user)
    #Adaptaciones personales
    users_frame=DataFrame(final_users)
    return users_frame 
Esempio n. 15
0
    def executa_query(self, query, get_data_frame=False):
        dados = None

        if (get_data_frame is False):
            dados = psql.frame_query(query, con=self.conexao).to_dict()
        else:
            dados = psql.frame_query(query, con=self.conexao)

        if (dados is None):
            raise ResultadoConsultaNuloError(
                "A biblioteca pandas não está instalada, ou nenhum dado foi passado a esse método"
            )
        else:
            return dados
Esempio n. 16
0
def clidesc_getStationsByCountry(conn, country):
    """
    Gets all the station details for all stations in a given country code (single or vector).
    It opens the connection, reads the stations table
    country must be a string, with country codes (if more than one)
    separated by ','

    Parameters
    ----------

    conn : db connection
        A database connection object as returned by clides_open()

    country : string
        The two letters country identifier
        e.g. 'WS' for Samoa, 'FJ' for Fiji, ...

    Returns
    -------

    table : Pandas.DataFrame
        A Pandas.DataFrame object containing the list of stations
        available for the country

    """

    if ',' in country:
        country = country.replace(',','\',\'')

    # builds the query string
    query = """SELECT * FROM stations WHERE country_code IN ('%s') ORDER BY country_code, station_no""" % ( country )
    # get the table returned by the query as a pandas dataframe
    table = psql.frame_query(query, conn)
    return table
Esempio n. 17
0
def read_cgc():
    """Gets the genes from the cancer gene census.

    Data from CGC is available from here:
    http://cancer.sanger.ac.uk/cancergenome/projects/census/

    Returns
    -------
    cgc_in_database : tuple
        tuple of gene names in cancer gene census also in COSMIC
    """
    cfg_opts = get_input_config('input')
    cgc_path = os.path.join(proj_dir, cfg_opts['cgc'])
    with open(cgc_path, 'r') as handle:
        cgc = tuple(gene.strip() for gene in handle.readlines())

    # open connection
    try:
        # if DB is not created this will throw an error
        gene_db_path = os.path.join(proj_dir, get_db_config('2020plus')['db'])
        conn = sqlite3.connect(gene_db_path)

        sql = ("SELECT DISTINCT Gene"
               " FROM mutation"
               " WHERE Gene in " + str(cgc))
        df = psql.frame_query(sql, con=conn)
        conn.close()  # close connection

        # get significantly mutated genes found in database
        cgc_in_database = tuple(df['Gene'].astype(str))
        logger.debug('There are only %d/%d CGC genes found in the database.' %
                     (len(cgc_in_database), len(cgc)))
    except:
        cgc_in_database = cgc
    return cgc_in_database
Esempio n. 18
0
def clidesc_getColumns(conn, table):
    """
    Return the column names of a table

    Parameters
    ----------

    conn : the postgresql connection
    table : string
        the table (e.g. 'obs_daily', 'obs_subdaily')

    Returns
    -------
    columns : Pandas.DataFrame
        A Pandas.Series containing the column names

    Usage
    -----

    columns = clidesc_getColumns(conn, 'obs_subdaily')
    columns = clidesc_getColumns(conn, 'obs_daily')
    """

    query = "SELECT column_name FROM information_schema.columns WHERE table_name = '{}'".format(
        table)
    columns = psql.frame_query(query, conn)
    return columns
Esempio n. 19
0
 def data_frame(self, csv_file, dbhost, dbport, dbname, dbuser, dbpassword,
                sql_file, sqlparams):
     if csv_file:
         df = read_csv(csv_file)
         if sqlparams:
             params = dict([params.split('=') for params in sqlparams])
             for column in params:
                 filters = params[column].replace("'", '').split(',')
                 df = df[df[column].isin(filters)]
         self.connection = None
     else:
         query = open(sql_file).read()
         params = dict([params.split('=') for params in sqlparams])
         connstring = "host='%(host)s' port='%(port)s'" \
                      "dbname='%(dbname)s' user='******'" \
                      "password='******'"
         connstring = connstring % {'host': dbhost,
                                    'port': dbport,
                                    'dbname': dbname,
                                    'user': dbuser,
                                    'password': dbpassword}
         conn = psycopg2.connect(connstring or self.report['connstring'])
         self.connection = conn
         df = psql.frame_query(query % params, con=conn)
     return df
Esempio n. 20
0
def main_cosmic(options):
    """Main function used to process COSMIC data."""
    # get configs
    in_opts = _utils.get_input_config('classifier')
    out_opts = _utils.get_output_config('features')
    count_opts = _utils.get_output_config('feature_matrix')
    # result_opts = _utils.get_input_config('result')
    db_cfg = _utils.get_db_config('2020plus')

    # get mutations
    conn = sqlite3.connect(db_cfg['db'])
    sql = ("SELECT Gene, Protein_Change as AminoAcid, "
           "       DNA_Change as Nucleotide, "
           "       Variant_Classification, "
           "       Tumor_Sample, Tumor_Type "
           "FROM mutations")
    mut_df = psql.frame_query(sql, con=conn)
    conn.close()

    # get features for classification
    all_features = futils.generate_features(mut_df, options)

    # save features to text file
    cols = all_features.columns.tolist()
    new_order = ['gene'
                 ] + cols[:cols.index('gene')] + cols[cols.index('gene') + 1:]
    all_features = all_features[
        new_order]  # make the gene name the first column
    out_path = _utils.save_dir + in_opts['gene_features'] if not options[
        'output'] else options['output']
    all_features.to_csv(out_path, sep='\t', index=False)
Esempio n. 21
0
def clidesc_stations(conn, stations):
    """
    Gets all the station details for a station number
    It opens the connection, reads the stations table
    station_no is a string, with stations separated by ','

    Parameters
    ----------

    conn : db connection
        Database connection object as returned by clidesc_open()

    stations : string
        The station or station list
        if several station should be passed as string, not list
        e.g. 'XXXXX, YYYYY, ZZZZZ'

    Returns
    -------

    table : Pandas.DataFrame
        A Pandas.DataFrame object containing the station
        information
    """

    if isinstance(stations, str) and ',' in stations:
        stations = stations.replace(',','\',\'')

    # builds the query string
    query = """SELECT * FROM stations WHERE station_no IN ('%s') ORDER BY station_no""" % (stations)

    # get the table returned by the query as a pandas dataframe
    table = psql.frame_query(query, conn)
    return table
Esempio n. 22
0
def clidesc_getColumns(conn, table):
    """
    Return the column names of a table

    Parameters
    ----------

    conn : the postgresql connection
    table : string
        the table (e.g. 'obs_daily', 'obs_subdaily')

    Returns
    -------
    columns : Pandas.DataFrame
        A Pandas.Series containing the column names

    Usage
    -----

    columns = clidesc_getColumns(conn, 'obs_subdaily')
    columns = clidesc_getColumns(conn, 'obs_daily')
    """

    query = "SELECT column_name FROM information_schema.columns WHERE table_name = '{}'".format(table)
    columns = psql.frame_query(query, conn)
    return columns
Esempio n. 23
0
def L32pred():
    db = MySQLdb.connect("localhost", "root", "", "ge_hackathon")
    cursor = db.cursor()
    sql = "SELECT * FROM answers WHERE finished='0'"
    data = frame_query(sql, db)
    URL = 'DUMB.csv'
    DUMB = pandas.read_csv(URL)
    N = '22'
    if N == '21':
        Questions = list(data.columns[2:8]) + list(data.columns[10:15])
        Dumbster = DUMB[DUMB.columns[1:7] + DUMB.columns[9:14]]
    elif N == '22':
        Questions = list(data.columns[2:8]) + list(data.columns[17:21])
        Dumbster = DUMB.columns[1:7] + DUMB.columns[16:20]
    elif N == '23':
        Questions = list(data.columns[2:8]) + list(data.columns[23:27])
        Dumbster = DUMB.columns[1:7] + DUMB.columns[22:26]
    else:
        Questions = list(data.columns[2:8]) + list(data.columns[29:33])
        Dumbster = DUMB.columns[1:7] + DUMB.columns[28:32]
    df_DUMB = DUMB[Dumbster + DUMB.columns[39:42]]
    df_test = data[Questions + list(data2.columns[40:43])]
    frames = [df_DUMB, df_test]
    df_WH = pandas.concat(frames)
    #ENCODING
    X_test = df_WH.to_dict('records')
    X_te = []
    X_te.extend(X_test)
    encoder = DictVectorizer(sparse=True)
    X_encoded_test = encoder.fit_transform(X_te)
    clf2 = joblib.load('EstL32.pkl')
    Predictions = clf2.predict(X_encoded_test.toarray()[-1])
    return str(int(Predictions))
Esempio n. 24
0
def clidesc_stations(conn, stations):
    """
    Gets all the station details for a station number
    It opens the connection, reads the stations table
    station_no is a string, with stations separated by ','

    Parameters
    ----------

    conn : db connection
        Database connection object as returned by clidesc_open()

    stations : string
        The station or station list
        if several station should be passed as string, not list
        e.g. 'XXXXX, YYYYY, ZZZZZ'

    Returns
    -------

    table : Pandas.DataFrame
        A Pandas.DataFrame object containing the station
        information
    """

    if isinstance(stations, str) and ',' in stations:
        stations = stations.replace(',', '\',\'')

    # builds the query string
    query = """SELECT * FROM stations WHERE station_no IN ('%s') ORDER BY station_no""" % (
        stations)

    # get the table returned by the query as a pandas dataframe
    table = psql.frame_query(query, conn)
    return table
Esempio n. 25
0
def L33Night():
    #L21 Night
    db = MySQLdb.connect("localhost", "root", "", "ge_hackathon")
    cursor = db.cursor()
    sql = "SELECT * FROM answers WHERE finished='1'"
    data = frame_query(sql, db)
    N = '22'
    if N == '21':
        Questions = list(data.columns[2:8]) + list(data.columns[10:15])
        Dumbster = DUMB[DUMB.columns[1:7] + DUMB.columns[9:14]]
    elif N == '22':
        Questions = list(data.columns[2:8]) + list(data.columns[17:21])
        Dumbster = DUMB.columns[1:7] + DUMB.columns[16:20]
    elif N == '23':
        Questions = list(data.columns[2:8]) + list(data.columns[23:27])
        Dumbster = DUMB.columns[1:7] + DUMB.columns[22:26]
    else:
        Questions = list(data.columns[2:8]) + list(data.columns[29:33])
        Dumbster = DUMB.columns[1:7] + DUMB.columns[28:32]
    df_training = data
    df_train = df_training[Questions + list(data.columns[45:49])]
    Y_train = df_train['L33P'].values
    del df_train['L33P']

    #ENCODING
    X_train = df_train.to_dict('records')
    X_tr = []
    X_tr.extend(X_train)
    #One Hot Encoding
    enc = DictVectorizer(sparse=True)
    X_encoded_train = enc.fit_transform(X_tr)
    estimator = GradientBoostingClassifier()
    estimator.fit(X_encoded_train.toarray(), Y_train)
    joblib.dump(estimator, 'EstL33.pkl')
    return 'success'
Esempio n. 26
0
 def select(self, sql, commit=True, df=True):
     """
     Execute une requete select pgsql et retourne 
     le résultat si il y en a un et la sortie
     console.
     """        
     try:
         if self.debug == True:
            self.info("Sélection - %s" %sql)
             
         if df == True:
             df = psql.frame_query(sql, con=self.con)
             
             if len(df) == 0 and self.debug is True:
                 self.warning("... Table vide")
             
             if self.debug == True:
                 self.info("... Requête executée")
             return df
         else:
             self.cur.execute(sql)
             if self.debug == True:
                 self.info("... Requête executée: %s " %self.cur.statusmessage)
             return self.cur.fetchall()  
              
     except psycopg2.DatabaseError, e:
         self.error("%s" %e)
Esempio n. 27
0
def main_cosmic(options):
    """Main function used to process COSMIC data."""
    # get configs
    in_opts = _utils.get_input_config('classifier')
    out_opts = _utils.get_output_config('features')
    count_opts = _utils.get_output_config('feature_matrix')
    # result_opts = _utils.get_input_config('result')
    db_cfg = _utils.get_db_config('2020plus')

    # get mutations
    conn = sqlite3.connect(db_cfg['db'])
    sql = ("SELECT Gene, Protein_Change as AminoAcid, "
            "       DNA_Change as Nucleotide, "
            "       Variant_Classification, "
            "       Tumor_Sample, Tumor_Type "
            "FROM mutations")
    mut_df = psql.frame_query(sql, con=conn)
    conn.close()

    # get features for classification
    all_features = futils.generate_features(mut_df, options)

    # save features to text file
    cols = all_features.columns.tolist()
    new_order = ['gene'] + cols[:cols.index('gene')] + cols[cols.index('gene')+1:]
    all_features = all_features[new_order]  # make the gene name the first column
    out_path = _utils.save_dir + in_opts['gene_features'] if not options['output'] else options['output']
    all_features.to_csv(out_path, sep='\t', index=False)
Esempio n. 28
0
def integrity_check():
    
    date1 = "1986-01-01"
    date2 = "2013-01-01"
    
    db = MySQLdb.connect(user = '******', passwd = 'xxxxxxxx', db = 'db')      
    
    query = ("""SELECT DISTINCT artist,title FROM album WHERE chart_date BETWEEN 
             '{0}' AND '{1}';""".format(date1, date2))
    
    df = psql.frame_query(query, con=db)
    
    db.close()
    
    for tup in df.itertuples():

        
        if isinstance(tup[1], basestring):
            pass
        else:
            print "Entry {0} -- Artist -- {1} -- Not a string".format(tup[0], tup[1])
    
    
        if isinstance(tup[2], basestring):
            pass
        else:
            print "Entry {0} -- Title -- {1} -- Not a string".format(tup[0], tup[1])
    
    return True
    def get_sub_recommendations(self, redditor, subs=None):
        """Given a redditor, return recommended subs with scores"""
        redditor_subs = self.load_redditor_subs(redditor, subs=subs)
        if not isinstance(redditor_subs, list) or not len(redditor_subs):
            redditor_subs = DEFAULT_SUBS

        #Filter for similarity
        redditor_subs = [sub for sub in redditor_subs if sub in SUBS]
        base_query = "select sub2, sum(similarity) as similarity from similarity where sub1 in ({})\
                group by sub2 ORDER BY similarity DESC;"

        query = base_query.format(', '.join(
            ["'" + i + "'" for i in redditor_subs]))
        all_sub_scores = pd_psql.frame_query(query, self.db_session.connection)
        all_sub_scores = all_sub_scores[-all_sub_scores.sub2.isin(redditor_subs
                                                                  )]
        all_sub_scores.fillna(0, inplace=True)
        all_sub_scores.sort(columns=['similarity'],
                            ascending=False,
                            inplace=True)
        recommendations = list(all_sub_scores.sub2[(
            -all_sub_scores.sub2.isin(DEFAULT_SUBS))][:10])
        logging.info('\n\nRECOMMENDATIONS FOR REDDITOR {}:\n'.format(redditor))
        logging.info(recommendations)
        if not len(recommendations):
            recommendations = DEFAULT_SUBS
        return recommendations
Esempio n. 30
0
def clidesc_getStationsByCountry(conn, country):
    """
    Gets all the station details for all stations in a given country code (single or vector).
    It opens the connection, reads the stations table
    country must be a string, with country codes (if more than one)
    separated by ','

    Parameters
    ----------

    conn : db connection
        A database connection object as returned by clides_open()

    country : string
        The two letters country identifier
        e.g. 'WS' for Samoa, 'FJ' for Fiji, ...

    Returns
    -------

    table : Pandas.DataFrame
        A Pandas.DataFrame object containing the list of stations
        available for the country

    """

    if ',' in country:
        country = country.replace(',', '\',\'')

    # builds the query string
    query = """SELECT * FROM stations WHERE country_code IN ('%s') ORDER BY country_code, station_no""" % (
        country)
    # get the table returned by the query as a pandas dataframe
    table = psql.frame_query(query, conn)
    return table
Esempio n. 31
0
def main():

    db = mdb.connect(user="******",host="localhost",passwd="konnichiwa1",db="foodgrouper")
    query = 'Select * from foodgrouper.CityPopulation;'
    df = psql.frame_query(query, db)
    cur = db.cursor()
    
    for i in range(157,len(df[['latitude','longitude']])):
        latlong = df[['latitude','longitude']][i-1:i]
        clusters,data,c_info = foodgroups.foodGroups(latlong['latitude'][i-1],latlong['longitude'][i-1])
        
        cluster_results = pd.DataFrame()
        cluster_results['labels'] = clusters['labels']
        cluster_results['core_samples_mask'] = clusters['core_samples_mask']
        cluster_results['eps'] = clusters['eps']
        
        insert_request = "INSERT INTO foodgrouper.requests(latitude,longitude,City,State) VALUES (%f, %f, '%s', '%s');"%(latlong['latitude'][i-1]
                ,latlong['longitude'][i-1],df['City'][i-1:i][i-1],df['State'][i-1:i][i-1])
               
        print "Writing city %i: %s, %s data to DB."%(i,df['City'][i-1:i][i-1],df['State'][i-1:i][i-1])
        
        a = Thread(target=noInterrupt, args=(db,cur,insert_request,data,c_info,cluster_results))
        a.start()
        a.join()
        time.sleep(randint(1,3))

    cur.close()
    con.close()
Esempio n. 32
0
    def getActivityEntries( self ):
        print( len( self.TablesList ) )
        for index in range( len( self.TablesList ) ):

            self.cursor.execute( "select Activity_Type from %s group by Activity_Type" % self.TablesList[index] )

            result_activity = self.cursor.fetchall()

            k = 0
            del self.ActivityList[0:len( self.ActivityList )]
            for t in result_activity:
                self.ActivityList.insert( k, t )
                k = k + 1

            for a in self.ActivityList:


                sql = ( "select * from " + self.TablesList[index] + "  where Activity_Type = '" + str( a[0] ) + "'" )  # % (TablesList[index], str(a[0])))

                df = pd_sql.frame_query( sql, self.db , index_col = "Date" )


                orderedList = {'region':None, 'activity':None , 'Data':None}
                orderedList['region'] = self.TablesList[index]
                orderedList['activity'] = a[0]
                orderedList['Data'] = df
                self.FinalList.append( orderedList )


        return self.FinalList
Esempio n. 33
0
def L33pred():
    URL = 'DUMB.csv'
    DUMB = pandas.read_csv(URL)
    db = MySQLdb.connect("localhost", "root", "", "ge_hackathon")
    cursor = db.cursor()
    sql = "SELECT * FROM answers WHERE finished='0'"
    data = frame_query(sql, db)
    URL = 'DUMB.csv'
    DUMB = pandas.read_csv(URL)
    Dumbster = DUMB.columns[1:7]
    df_DUMB = DUMB[Dumbster + DUMB.columns[44:47]]
    Questions = list(data.columns[2:8])
    Dumbster = DUMB.columns[1:7]
    df_test = data[Questions + list(data.columns[45:48])]
    frames = [df_DUMB, df_test]
    df_WH = pandas.concat(frames)

    #ENCODING
    X_test = df_WH.to_dict('records')
    X_te = []
    X_te.extend(X_test)

    encoder = DictVectorizer(sparse=True)
    X_encoded_test = encoder.fit_transform(X_te)
    clf2 = joblib.load('EstL33.pkl')
    Predictions = clf2.predict(X_encoded_test.toarray()[-1])
    Predictions_prob = clf2.predict_proba(X_encoded_test.toarray()[-1])
    return str(int(Predictions))
Esempio n. 34
0
def get_rx_data(rxID, tstart, tstop):
    '''
    Query the GEONET database for data from a receiver in a certain time span.
    INPUTS:
        rxID - integer, receiver ID
        tstart - datetime, start of time window
        tstop  - datetime, end of time window
    OUTPUTS:
        frame - an object containing the desired GPS data:
                rxID, time [POSIX], latitude [deg], longitude [deg], and altitude [m]
    '''
    
    # Open a connection to the database
    con = mdb.connect(host=hostname,user='******',passwd='bdpass',db='gpsdatabase')
    
    # This is the UT POSIX time for the start/stop times of interest
    startUT = calendar.timegm(tstart.timetuple())
    stopUT =  calendar.timegm(tstop.timetuple())
    
    # First find out if the entry is in there (i.e., we are just updating the png and avi file)
    sql_cmd = 'SELECT rxID, UT, lat, lon, alt ' + \
              'FROM rxposition WHERE rxID = %s AND UT >= %d AND UT <= %d' % (rxID, startUT, stopUT)
    frame = psql.frame_query(sql_cmd, con=con)
    con.close()
    
    return frame
Esempio n. 35
0
 def getRawData(self, sFileDB, nStartFrm, nEndFrm):
     sTable = "Tracks"
     self.conn = lite.connect(sFileDB)
     sRange = "Frame>=" + str(nStartFrm) + " and Frame<=" + str(nEndFrm)
     dfRawData = psql.frame_query(
         "SELECT * FROM " + sTable + " WHERE " + sRange, self.conn)
     return dfRawData
Esempio n. 36
0
def run(proj):
    symbolsin=proj.symbol
    causesin=proj.cause
    projid=proj.projid
    ma=multiprocessing.Manager()
    queue=ma.Queue()
    pool_size=multiprocessing.cpu_count()*2
    pool=multiprocessing.Pool(processes=pool_size)
    result=pool.map_async(calcsymbol,((proj,symbol,queue) for symbol in symbolsin))
    time.sleep(1)
    causedf=None
    while not queue.empty():
        print "gone in to loop now"
        item=queue.get()
        #assure item[1] type
        symbolid=item[0]
        if isinstance(item[1],int):
            if item[1]==0:
                #update the symbol status
                try:
                    conn=MySQLdb.connect(host='localhost',user='******',passwd='david',db='david_trade')
                    cur=conn.cursor()
                    expression="update symbol set status=0 where symbolid="+str(symbolid)+";"
                    #tradedb.update('symbol',where="symbolid =$symbolid",vars={'symbolid':symbolid}, btstatus = 0)
                    print "update 0"
                    cur.execute(expression)
                    conn.commit()
                    cur.close()
                    conn.close()
                except MySQLdb.Error,e:
                    print "error in update 0  "
                    print "Mysql Error %d:%s" %(e.args[0],e.args[1])
            elif item[1]==1:
                print "update select cause"
                resultid=str(projid)+'_'+str(symbolid)
                try:
                    conn=MySQLdb.connect(host='localhost',user='******',passwd='david',db='david_trade')
                    cur=conn.cursor()
                    expression="select btstatus from symbol where symbolid="+str(symbolid)+";"
                    cur.execute(expression)
                    #check btstatus to continue calc backtester
                    #will have a condition query from mysql
                    btstatus=cur.fetchone()
                    if btstatus==1:
                        return
                    else:
                        #select the causetable from mysql to calc backtester
                        resultid=str(projid)+'_'+str(symbolid)
                        tablename=resultid+'_result'
                        expression="select * from "+tablename+";"
                        causedf=psql.frame_query(expression, con=conn)
                        #generate resultdf with cp and ncp for cause
                        causedf.set_index('Date',drop=True,inplace=True)
                    conn.commit()
                    cur.close()
                    conn.close()
                    calcbtcor(proj,causedf,resultid)
                except MySQLdb.Error,e:
                    print "error in select cause"
                    print "Mysql Error %d:%s" %(e.args[0],e.args[1])
Esempio n. 37
0
def read_olfactory_receptors():
    """Reads in the significant olfactory receptors from Mutsigcv.

    Returns
    -------
    olfactory : tuple
        tuple of gene names considered as olfactory receptors
    """
    cfg_opts = get_input_config('input')
    or_path = os.path.join(proj_dir, cfg_opts['olfactory_receptors'])
    with open(or_path, 'r') as handle:
        olfactory = tuple(gene.strip() for gene in handle.readlines())

    # open connection
    try:
        # it table is not found just catch exception
        gene_db_path = os.path.join(proj_dir, get_db_config('2020plus')['db'])
        conn = sqlite3.connect(gene_db_path)

        sql = ("SELECT DISTINCT Gene"
               " FROM mutation"
               " WHERE Gene in " + str(olfactory))

        df = psql.frame_query(sql, con=conn)
        conn.close()  # close connection

        # get significantly mutated genes found in database
        olfactory_in_database = tuple(df['Gene'])
        logger.debug(
            'There are only %d/%d olfactory receptors found in the database.' %
            (len(olfactory_in_database), len(olfactory)))
    except:
        olfactory_in_database = olfactory
    return olfactory_in_database
Esempio n. 38
0
def find_restaurant_for_food(food, city,db):
    sql = "select bid from Acad_reviews where city = \'"+city+"\' AND text like \'%"+food+"%\' AND stars > 4 order by bid"
    df_bid = psql.frame_query(sql, con=db)
    bids = df_bid['bid']
    bid_counter = Counter(bids)
    top3 = bid_counter.most_common(3)
    topRest=[]
    for item in top3:
        bid = item[0]
        sql = "select name,url from acad_restaurants where bid =\'"+bid+"\'"
        df_rest = psql.frame_query(sql, con=db)
        rname = df_rest['name'][0]
        url = df_rest['url'][0]
        rec=[rname,url]
        topRest.append(rec)
    return topRest
Esempio n. 39
0
def find():
    rec =[]
    homecity = request.form['homecity']
    visitcity = request.form['visitcity']
    if not homecity:
        homecity = 'atlanta'
    if not visitcity:
        visitcity = 'new york'
    db = MySQLdb.connect(host = 'localhost',user = '******',passwd = 'insight', db = 'bestfood_AWS')
    cursor = db.cursor()
    cursor.execute("USE bestfood_AWS")
    sql = "select * from acad_topitems where homecity=\'"+visitcity+"\' AND visitcity= \'"+homecity+"\'  order by rank asc limit 3"
    df_mysql = psql.frame_query(sql, con=db)
    foodlist =[]
    for i in range(0,2):
        food = df_mysql['term'][i]
        rname = df_mysql['rest'][i]
        url = df_mysql['url'][i]
        ratio = df_mysql['popularity_ratio'][i]
	rec = [food,rname,url,ratio]
        foodlist.append(rec)
    wordlist=[]
    wordlist.append(rec)
    wordlist.append(visitcity)
    #random.shuffle(foodlist)
    return render_template('results.html', homecity = homecity, visitcity = visitcity, wordlist = foodlist) 
Esempio n. 40
0
def sqlite_table_to_hdf5(db_name, table):
    h5_name = os.path.splitext(db_name)[0]
    h5_group = '%s_%s' % (h5_name, table)

    h5_out = '%s.h5' % h5_name
    print "Storage: %s   Group: %s" % (h5_out, h5_group)
    store = pandas.HDFStore(h5_out)

    storer = store.get_storer('/%s' % h5_group)
    if storer is not None:
        # Already stored something, grab last transaction tid.
        last_item = store.select(h5_group, start=storer.nrows - 1)
        last_tid = int(last_item.index[0][1])
    else:
        last_tid = -1 # all-time data. This might take some time.
    print "Last transaction ID: %d" % last_tid

    db = sqlite3.connect(db_name, detect_types=sqlite3.PARSE_COLNAMES)
    query = """SELECT datetime(timestamp, 'unixepoch') as "ts [timestamp]",
        CAST(price as FLOAT) as price, CAST(amount as FLOAT) as volume, tid
        FROM [%s] WHERE tid > ? ORDER BY tid ASC LIMIT 5000""" % table

    while True:
        dataframe = psql.frame_query(query, con=db, params=(last_tid, ),
                index_col=['ts', 'tid'])

        if not len(dataframe) or last_tid == dataframe.index[-1][1]:
            print "Done."
            break

        print last_tid, dataframe.index[-1][1], len(dataframe)
        store.append(h5_group, dataframe)
        last_tid = int(dataframe.index[-1][1])

    store.close()
Esempio n. 41
0
def get_data(Query):
    import pandas.io.sql as psql
    from pyodbc import connect

    AccessDBFileName = r".\SIF_Testing.accdb"
    conn = connect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=" + AccessDBFileName + ";")
    DataBase = psql.frame_query(Query, conn)
    return DataBase
Esempio n. 42
0
File: ODBC.py Progetto: kbeigan/PCO
    def __Get_snapshot_of_data(self):
        import pandas.io.sql as psql
        from pyodbc import connect

        AccessDBFileName = r"\\ANUBIS\Dad_Docs\8510\8510 - Process Control Obsolescence\ENGINEERING\Databases\8510-0069 PLC Design Database.accdb"
        conn = connect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=" + AccessDBFileName + ";")
        AllNamesQuery = "SELECT [~qry APM2 Classification Information].* FROM [~qry APM2 Classification Information];"
        return psql.frame_query(AllNamesQuery, conn)
 def get_similarity_from_sub(self, sub):
     """Given a sub, return all other subs with similarity"""
     sub_scores = pd_psql.frame_query("SELECT sub2, similarity FROM\
         similarity WHERE sub1 = '{0} ORDER BY sum DESC';".format(sub), self.db_session.connection)
     sub_scores.similarity = sub_scores.similarity.astype(float)
     sub_scores = sub_scores[sub_scores['similarity']>0]
     sub_scores.set_index('sub2',inplace=True)
     return sub_scores
Esempio n. 44
0
def getDataframeFromDatabase(host, db, un, pw):
    #query = "SELECT package from potential_unfair_apps LIMIT 1000;"
    query = "SELECT package from potential_unfair_apps;"
    print query
    conn = MySQLdb.connect(host=host, user=un, passwd=pw, db=db)

    unfair_apps_df = psql.frame_query(query, conn)

    return unfair_apps_df
Esempio n. 45
0
def load_rows(sql):
    conn = psycopg2.connect(dbname=db, user=username, host=hostname)
    
    try:                              
        df = psql.frame_query(sql, conn)        
    finally:
        conn.close()

    return df
Esempio n. 46
0
def get_all_series_json(config):
    df = ""
    with statmart_utils.get_db_connection() as cursor:
        query = """
                SELECT identifier, description
                FROM series as S 
                WHERE S.identifier like '%s%s%s'
                """ % (config("prefix"), "\_%\_", config("suffix"))
        df = psql.frame_query(query, con=cursor.connection)
    return json.dumps(df.as_matrix().tolist(), indent=4)
Esempio n. 47
0
    def get_reviews(self, imdb_movie_id):
        if not self.in_review_database(imdb_movie_id):
            raise Exception("Movie %d is not in the database" % imdb_movie_id)

        query = """
            SELECT * FROM rs_reviews 
            WHERE rs_imdb_movie_id=%s""" % imdb_movie_id
        df_mysql = psql.frame_query(query, con=self.db)

        return df_mysql
 def get_similarity_from_sub(self, sub):
     """Given a sub, return all other subs with similarity"""
     sub_scores = pd_psql.frame_query(
         "SELECT sub2, similarity FROM\
         similarity WHERE sub1 = '{0} ORDER BY sum DESC';".format(sub),
         self.db_session.connection)
     sub_scores.similarity = sub_scores.similarity.astype(float)
     sub_scores = sub_scores[sub_scores['similarity'] > 0]
     sub_scores.set_index('sub2', inplace=True)
     return sub_scores
 def load_redditor_subs(self, redditor, force_update=False):
     redditor_subs = pd_psql.frame_query("SELECT * FROM redditors WHERE redditor = '{0}'".format(redditor), self.db_session.connection)
     redditor_subs = redditor_subs.dropna(axis=1)
     del redditor_subs['redditor']
     if redditor_subs.empty or force_update:
         print '\nRedditor not in the database. Acquiring.\n'
         if check_redditor_exists(redditor):
             return get_redditor_subs(redditor)
         return []
     return list(redditor_subs.columns)
Esempio n. 50
0
def extract_data_from_DB(datetime_format, db_local_path, db_utils_dict):
	with sqlite3.connect(db_local_path, detect_types=sqlite3.PARSE_DECLTYPES) as conn:
	    dataframe_sqlite = psql.frame_query('select ' + db_utils_dict['table_columns'][0] + ', ' + db_utils_dict['table_columns'][1] + 
	    	', ' + db_utils_dict['table_columns'][2] + ' from ' + db_utils_dict['table_name'] , con=conn)
	    # TODO: change the hour to 2 hours more! -> http://pandas.pydata.org/pandas-docs/dev/generated/pandas.tseries.tools.to_datetime.html 
	    # This solution is a bit hacky ... and slow!!!   
	    dataframe_sqlite.index = pd.to_datetime(dataframe_sqlite.pop(db_utils_dict['table_columns'][0]))
	    dataframe_sqlite_UTC = dataframe_sqlite.tz_localize('UTC')
	    dataframe_sqlite_MAD = dataframe_sqlite_UTC.tz_convert(MAD)
	return dataframe_sqlite_MAD
Esempio n. 51
0
def getDataframeFromDatabase(host, db, un, pw):
    #query = "SELECT package from potential_unfair_apps LIMIT 1000;"
    query = "SELECT package from potential_unfair_apps;"
    print query
    conn = MySQLdb.connect(host = host, user = un, passwd = pw, db = db)


    unfair_apps_df = psql.frame_query(query, conn)

    return unfair_apps_df
Esempio n. 52
0
 def get_pandas(self, sql):
     try:
         self._con_cur()
         df = psql.frame_query(sql, con=self._con)
         return self._sanitize_df(df)
     except Exception, e:
         traceback.print_exc()
         print "Error: %s" % str(e)
         self._close()
         raise (e)
Esempio n. 53
0
def clidesc_ObsDaily(conn, channels, stations, from_date, to_date):
    """
    returns the daily observations table for the requested stations, channels and date range.

    Parameters
    ----------

    conn : db connection
        A database connection object as returned by clides_open()

    channels : string
        The variable channels in the table

    stations : string
        The station or stations list

    from_date : string
        The starting date in 'YYYY-MM-DD' format
        e.g. '2014-01-01'

    to_date : string
        The end date in 'YYYY-MM-DD' format
        e.g. '2014-01-01'

    Returns
    -------

    table : Pandas.DataFrame
        A Pandas.DataFrame containing the data

    """

    # some string formatting on the stations if more than one
    if isinstance(stations, str) and ',' in stations:
        stations = stations.replace(',', '\',\'')

    inputs = '%s::%s::%s::%s' % (channels, stations, from_date, to_date)

    query = """
    SELECT station_no, TO_CHAR(lsd, 'yyyy-mm-dd') as LSD,
    %s FROM obs_daily WHERE station_no IN ('%s') AND lsd >= TO_TIMESTAMP('%s', 'yyyy-mm-dd')
    AND lsd <= TO_TIMESTAMP('%s', 'yyyy-mm-dd') ORDER BY lsd""" % (
        channels, stations, from_date, to_date)

    # get the table returned by the query as a pandas dataframe
    try:
        table = psql.frame_query(query, conn)
        # the index of the pandas DataFrame is the 'lsd' field, correctly
        # cast to pandas datetime index and named 'timestamp'
        table.index = pd.to_datetime(table.lsd)
        table.index.name = 'timestamp'
        return table
    except:
        print('query failed for %s, was probably malformed' % (inputs))
        return None
Esempio n. 54
0
def load_estimate_hist(uni_df, start, end, estimate):
    window = timedelta(days=252)
    con = lite.connect(ESTIMATES_BASE_DIR + "ibes.db")    
    date = start

    df_list = list()
    uni_df = uni_df.reset_index()
    while (date < end):
        if date.day == end.day:
            times = [ str(end.hour - 3) + ":" + str(end.minute) ]
        else:
            times = ['16:00']
        endDateStr = date.strftime('%Y%m%d')
        startDateStr = (date - window).strftime('%Y%m%d')
        for time in times:            
            minPeriod = str(int(endDateStr[2:4])) + endDateStr[4:6]
            maxPeriod = str(int(endDateStr[2:4]) + 2) + "00"
            sql = "select * from t_ibes_det_snapshot where timestamp between '{} {}' and '{} {}' and measure = '{}' and forecast_period_ind = 1 and forecast_period_end_date > {} and forecast_period_end_date < {} group by sid, ibes_ticker, estimator, forecast_period_ind, forecast_period_end_date having timestamp = max(timestamp) order by sid, forecast_period_end_date;".format(startDateStr, time, endDateStr, time, estimate, minPeriod, maxPeriod)
            print(sql)
            df = psql.frame_query(sql, con)
            df['value'] = df['value'].astype(str)
            df = df[ df['value'] != '' ]
            #            df['ts'] = pd.to_datetime( date.strftime("%Y%m%d") + " " + time )
            df['date'] = pd.to_datetime( date.strftime("%Y%m%d") )
            df['value'] = df['value'].astype(float)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            print(df.columns)
            df = pd.merge(uni_df[ uni_df['date'] == date ], df, how='inner', left_on=['sid'], right_on=['sid'], sort=True, suffixes=['', '_dead'])
            df = df[ ~df.duplicated(cols=['date', 'sid', 'estimator']) ]
            df = df.set_index(['date', 'sid'])
            df_list.append(df)
            date += timedelta(days=1)

    df = pd.concat(df_list)
    print("DFEPS")
    print(df)
    #consensus
    result_df = df.groupby(level=['date', 'sid']).agg({'value' : [np.mean, np.median, np.std, 'count', np.max, np.min], 'timestamp' : 'last'})
    result_df.columns = result_df.columns.droplevel(0)
    for column in result_df.columns:
        result_df.rename(columns={column: estimate + '_' + column}, inplace=True)

    #detailed
    df = df.set_index('estimator', append=True)
    print("SEAN2")
    print(df.head())
    df2 = df['value'].unstack(['estimator', 'sid']).fillna(0).diff().iloc[1:].stack(['sid', 'estimator'])
    df2 = df2[ df2 != 0 ]
    df2 = df2.reset_index('estimator').groupby(level=['date', 'sid']).agg(np.mean)
    del df2['estimator']
    df2.columns = [estimate + '_diff_mean']

    result_df = pd.merge(result_df, df2, left_index=True, right_index=True, how='left')

    return result_df
Esempio n. 55
0
def get_series_info_map_json(config):
    df = ""
    with us.get_db_connection() as cursor:
        query = """
                SELECT identifier, originalsource, proximatesource
                FROM series
                WHERE series.identifier like '%s\_%s\_%s'
                """ % (config["prefix"], "%", config["suffix"])
        df = psql.frame_query(query, con=cursor.connection)
        df = df.set_index("identifier")
    return df.to_json(orient="index")
Esempio n. 56
0
def psql2df(conn, columns, table):
    try:
        df = psql.frame_query("SELECT id, price FROM stock_price", conn)
    except Exception as error:
        err = {'error': 'Error: %s' % error}
        log.error(err)
        return err
    finally:
        if conn is not None:
            conn.close()
    return df
Esempio n. 57
0
def getScrapedContent():

    DBNAME = 'newscontent'
    DBUSER = '******'
    PASSWRD = open('password.txt').readline()

    conn = psycopg2.connect(database=DBNAME, user=DBUSER, password=PASSWRD)
    sql = 'SELECT * from stocknews_newscontent'
    df = psql.frame_query(sql, conn)

    return df
Esempio n. 58
0
    def get_movie(self, imdb_movie_id):

        query = """
            SELECT * FROM rs_movies 
            WHERE rs_imdb_movie_id=%s""" % imdb_movie_id
        df_mysql = psql.frame_query(query, con=self.db)

        if len(df_mysql) == 0:
            raise Exception("Movie %d is not in the database" % imdb_movie_id)
        assert len(df_mysql) <= 1
        return df_mysql.ix[0]
Esempio n. 59
0
def get_gene_length():
    # get db
    db_path = get_db_config('2020plus')['db']

    # query for gene length
    conn = sqlite3.connect(db_path)
    sql = "SELECT gene, gene_length FROM gene_features"
    df = psql.frame_query(sql, con=conn)
    df = df.set_index('gene')
    conn.close()

    return df
Esempio n. 60
0
def sqldf(q, env, inmemory=True):
    """
    query pandas data frames using sql syntax

    q: a sql query using DataFrames as tables
    env: variable environment; locals() or globals() in your function
         allows sqldf to access the variables in your python environment
    dbtype: memory/disk
        default is in memory; if not memory then it will be temporarily
        persisted to disk

    Example
    -----------------------------------------

    # example with a data frame
    df = pd.DataFame({
        x: range(100),
        y: range(100)
    })

    from pandasql import sqldf
    sqldf("select * from df;", locals())
    sqldf("select avg(x) from df;", locals())

    #example with a list

    """

    if inmemory:
        dbname = ":memory:"
    else:
        dbname = ".pandasql.db"
    conn = sqlite.connect(dbname, detect_types=sqlite.PARSE_DECLTYPES)
    tables = _extract_table_names(q)
    for table in tables:
        if table not in env:
            conn.close()
            if not inmemory :
                os.remove(dbname)
            raise Exception("%s not found" % table)
        df = env[table]
        df = _ensure_data_frame(df, table)
        _write_table(table, df, conn)

    try:
        result = frame_query(q, conn)
    except:
        result = None
    finally:
        conn.close()
        if not inmemory:
            os.remove(dbname)
    return result