def _check_roundtrip(self, frame):
        _skip_if_no_MySQLdb()
        drop_sql = "DROP TABLE IF EXISTS test_table"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
        result = sql.read_frame("select * from test_table", self.db)

        # HACK!
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame['txt'] = ['a'] * len(frame)
        frame2 = frame.copy()
        frame2['Idx'] = Index(list(range(len(frame2)))) + 10
        drop_sql = "DROP TABLE IF EXISTS test_table2"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame2,
                        name='test_table2',
                        con=self.db,
                        flavor='mysql')
        result = sql.read_frame("select * from test_table2",
                                self.db,
                                index_col='Idx')
        expected = frame.copy()
        expected.index = Index(list(range(len(frame2)))) + 10
        tm.assert_frame_equal(expected, result)
Example #2
0
def export_to_urbancanvas(building_df, current_year, urbancanvas_scenario_id):
    import pandas.io.sql as sql
    import psycopg2
    import cStringIO
    conn_string = "host='paris.urbansim.org' dbname='denver' user='******' password='******' port=5433"
    conn = psycopg2.connect(conn_string)
    cur = conn.cursor()

    if urbancanvas_scenario_id == 0:
        query = "select nextval('developmentproject_id_seq');"
        nextval = sql.read_frame(query, conn)
        nextval = nextval.values[0][0]
        query = "select max(id)+1 from scenario_project;"
        id = sql.read_frame(query, conn)
        id = id.values[0][0]
        query = "INSERT INTO scenario(id, name) VALUES(%s, 'Run #%s');" % (
            nextval, nextval)
        cur.execute(query)
        conn.commit()
        query = "INSERT INTO scenario_project(id, scenario, project) VALUES(%s, %s, 1);" % (
            id, nextval)
        cur.execute(query)
        conn.commit()
        query = "select max(id)+1 from scenario_project;"
        id = sql.read_frame(query, conn)
        id = id.values[0][0]
        query = "INSERT INTO scenario_project(id, scenario, project) VALUES(%s, %s, %s);" % (
            id, nextval, nextval)
        cur.execute(query)
        conn.commit()
    else:
        nextval = urbancanvas_scenario_id
    nextval_string = '{' + str(nextval) + '}'
    building_df['projects'] = nextval_string

    valid_from = '{' + str(current_year) + '-1-1}'
    building_df['valid_from'] = valid_from
    building_df['land_area'] = 0
    building_df['tax_exempt'] = 0
    building_df['srcparc_id'] = '0'
    building_df['building_id'] = building_df.index.values
    #building_df['stories'] = 30 ###For testing!
    del building_df['unit_price_residential']
    del building_df['unit_price_non_residential']
    del building_df['building_sqft_per_job']
    del building_df['base_year_jobs']
    del building_df['non_residential_units']
    del building_df['all_units']

    print 'Exporting %s buildings to Urbancanvas database for project %s and year %s.' % (
        building_df.index.size, nextval, current_year)
    output = cStringIO.StringIO()
    building_df.to_csv(output, sep='\t', header=False, index=False)
    output.seek(0)
    cur.copy_from(output,
                  'building',
                  columns=tuple(building_df.columns.values.tolist()))
    conn.commit()

    return nextval
Example #3
0
def request_data_from_db(nations,dt_from,dt_to):
  
    global df_bbData
    #bloomberg와 ecos를 합쳐서 보여줌
    con = lite.connect('../data/nowcasting.db')
    df_idxData = sql.read_frame('select * from idx_data',con=con)
    df_idxIndex = sql.read_frame('select * from idx_desc',con=con)
    df_gdp = sql.read_frame('select * from idx_gdp',con=con)
    
    con.close()
    
    df_idxData.index = pd.to_datetime(df_idxData[df_idxData.columns[0]])
    df_idxData = df_idxData[df_idxData.columns[1:]]
    
    df_gdp.index = pd.to_datetime(df_gdp['date'])
    df_gdp = df_gdp[df_gdp.columns[:-1]]
    
    lst_degIdx = df_idxIndex[df_idxIndex['rgn2'].isin(nations)]['num']
    df_idxData = df_idxData[df_idxData.columns[df_idxData.columns.isin(lst_degIdx)]]
    
    #I529, I530은 데이터가 워낙 적어 빼줌
    df_idxData = df_idxData.drop(['I529','I530'],1)
    
    df_bbData = df_idxData

    #df_nation = extract_national_df(lst_nation)
    df_quarter,df_month,df_week,df_daily = agg_mmQqWw2(dt_from,dt_to)
    
    #df_gdp = extract_gdp_excel('../data/Ecos_gdp.xlsx','Sheet1')
    df_gdp = df_gdp.ix[df_quarter.index] #df_quarter가 가지고 있는 범위 만큼만 잘라줌
    df_gdp = df_gdp[nations[0].encode('utf-8')] #첫번째가 국가, 두번째는 글로벌이기 때문
    
    df_quarter['gdp'] = df_gdp
    
    return df_quarter,df_month,df_week
Example #4
0
    def _check_roundtrip(self, frame):
        _skip_if_no_MySQLdb()
        drop_sql = "DROP TABLE IF EXISTS test_table"
        cur = self.db.cursor()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", "Unknown table.*")
            cur.execute(drop_sql)
        sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
        result = sql.read_frame("select * from test_table", self.db)

        # HACK! Change this once indexes are handled properly.
        result.index = frame.index
        result.index.name = frame.index.name

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame['txt'] = ['a'] * len(frame)
        frame2 = frame.copy()
        index = Index(lrange(len(frame2))) + 10
        frame2['Idx'] = index
        drop_sql = "DROP TABLE IF EXISTS test_table2"
        cur = self.db.cursor()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", "Unknown table.*")
            cur.execute(drop_sql)
        sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql')
        result = sql.read_frame("select * from test_table2", self.db,
                                index_col='Idx')
        expected = frame.copy()

        # HACK! Change this once indexes are handled properly.
        expected.index = index
        expected.index.names = result.index.names
        tm.assert_frame_equal(expected, result)
Example #5
0
def get_development_projects():
    conn_string = orca.get_injectable('conn_string')
    if len(conn_string) == 0:
        print 'A "conn_string" injectable must be registered and populated. Skipping export-to-Urban-Canvas.'
        return None
    conn = psycopg2.connect(conn_string)
    cur = conn.cursor()

    print "Loading committed development projects table"
    devproj_query = "select id, placetype_id as building_type_id, duration, buildings_number, average_floors as stories, sqft as non_residential_sqft, sqft_unit as sqft_per_unit, units as residential_units, Name as name, start_date from developmentprojects where committed = 'TRUE';"
    devproj = sql.read_frame(devproj_query,conn)
    devproj['year_built'] = devproj.start_date.astype('object').astype('str')
    devproj.year_built = devproj.year_built.str.slice(start=0, stop=4)
    devproj.year_built = devproj.year_built.astype('int')

    print "Loading development project parcels"
    dp_pcl_query = "select developmentprojects_parcels.development_project, developmentprojects_parcels.parcel_id, parcel.parcel_acres from developmentprojects_parcels, parcel where developmentprojects_parcels.parcel_id = parcel.parcel_id;"
    dp_pcl = sql.read_frame(dp_pcl_query, conn)
    devproject_parcel_ids = dp_pcl.groupby('development_project').parcel_id.max().reset_index()  ##In future, use the parcel_acres field on this tbl too

    scheduled_development_events = pd.merge(devproject_parcel_ids, devproj, left_on='development_project', right_on='id')
    scheduled_development_events = scheduled_development_events.rename(columns={'development_project':'scheduled_development_event_id',
                                                                                'building_type_id':'development_type_id'})
    scheduled_development_events = scheduled_development_events[['scheduled_development_event_id', 'year_built', 'development_type_id', 'stories', u'non_residential_sqft', 'sqft_per_unit', 'residential_units', 'parcel_id']]
    for col in scheduled_development_events:
        scheduled_development_events[col] = scheduled_development_events[col].astype('int')

    return scheduled_development_events
    def _check_roundtrip(self, frame):
        _skip_if_no_MySQLdb()
        drop_sql = "DROP TABLE IF EXISTS test_table"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
        result = sql.read_frame("select * from test_table", self.db)

        # HACK!
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame['txt'] = ['a'] * len(frame)
        frame2 = frame.copy()
        frame2['Idx'] = Index(list(range(len(frame2)))) + 10
        drop_sql = "DROP TABLE IF EXISTS test_table2"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql')
        result = sql.read_frame("select * from test_table2", self.db,
                                index_col='Idx')
        expected = frame.copy()
        expected.index = Index(list(range(len(frame2)))) + 10
        tm.assert_frame_equal(expected, result)
def ReadPatternsFromDB(codearg,typearg,namearg,mode):

    code = codearg #'097950'#'005930' #'005380'#009540 #036570
    if typearg == 1:
        symbol = 'GOOG/KRX_'+code
    elif typearg == 2:
        symbol = 'GOOG/KOSDAQ_'+code
    elif typearg == 3:
        symbol = 'GOOG/INDEXKRX_KOSPI200'  
    # symbol = 'GOOG/INDEXKRX_KOSPI200'
    startdate = '2014-01-01'
    # enddate = '2008-12-30'
    print symbol
    
    '''
    pattern read
    '''
    
    dbname = 'pattern_db_'+codearg+'_'+namearg+'.sqlite'
    con = sqlite3.connect("../data/pattern/"+dbname)

    query = "SELECT * FROM sqlite_master WHERE type='table'"
    df = pd.io.sql.read_frame(query,con)

    tablelen = len(df)
    print 'tablelen:',tablelen    
    tablename_base = 'result_'+codearg+'_'+namearg
    
    readlist = []    
    for cnt in range(tablelen):
        tablename = tablename_base+'_'+str(cnt)
        # print 'readtable:',tablename
        patterndf = pd_sql.read_frame("SELECT * from "+tablename, con)
        readlist.append(PatternData(patterndf))
        readlist[cnt].patterndf.index = readlist[cnt].patterndf['Date']
        readlist[cnt].patterndf = readlist[cnt].patterndf.drop('Date',1)


    # print 'read pattern:',readlist[0].patterndf
    # print 'org patternAr:',patternAr_org[0].patterndf
    
    # con.close()    
    dbname = 'extractid_db_'+codearg+'_'+namearg+'.sqlite'
    con2 = sqlite3.connect("../data/pattern/"+dbname)
    tablename = 'result_'+codearg+'_'+namearg
    extractdf = pd_sql.read_frame("SELECT * from "+tablename, con2)
    extractids = extractdf['ExtractId'].values

    # print 'read pattern:'
    # print readlist[0].patterndf
    print 'extractids:',extractids,len(extractids)
    
    con.close()        
    con2.close()        
    print 'extractid save done'
    return readlist,extractids
Example #8
0
def update_apsim_output_table(masterDbConn, runPath, update):
    '''
    Updates the apsimOutput table in the master run database. If a run
    is already there it is updated, otherwise it is added.
    
    Parameters
    ----------
    masterDbConn : sqlite connection object
        master database to connect to
    runPath : string
        path to the run folder for the apsimData.sqlite database for a 
        particular run
    update : bool
        if the database needs to be updated or if it is the first commit for a
        particular run
        
    Returns
    -------
    Nothing.
    '''
    # get the runId
    runId = int(os.path.split(runPath)[1])
    
    # don't do anything if the database is being updated
    if update == True:
        print "*** Warning: Run {} data may already exist. Skipping write.".format(runId)
        return
    
    # get sow start from parameters table
    sql = "SELECT sow_start FROM runParameters WHERE run_id = {}".format(runId)
    sowStart = psql.read_frame(sql, masterDbConn).ix[0][0]
    
    # check to see if sow date is auto (determined from lookup table)
    if sowStart == 'auto':
        # read sow start for each location
        sql = "SELECT point_id, sow_start FROM gridPoints"
        sowDates = psql.read_frame(sql, masterDbConn, index_col='point_id')
    else:
        # set sow start the same for each location
        sql = "SELECT point_id FROM gridPoints"
        gridPoints = psql.read_frame(sql, masterDbConn)
        sowDates = pandas.DataFrame([sowStart] * len(gridPoints), index=gridPoints['point_id'])
    
    # get the run database path
    apsimDbPath = os.path.join(runPath, 'data', 'apsimData.sqlite')
    
    # read and convert to yearly formatted data
    apsimData = _apsim_output(apsimDbPath, sowDates)
    
    # add column with runId
    runIdSeries = pandas.Series([runId] * len(apsimData))
    apsimData['run_id'] = runIdSeries
    
    # write runData to master database
    psql.write_frame(apsimData, 'apsimOutput', masterDbConn, if_exists='append')
def export_to_urbancanvas(building_df,current_year,urbancanvas_scenario_id):
    import pandas.io.sql as sql
    import psycopg2
    import cStringIO
    conn_string = "host='paris.urbansim.org' dbname='denver' user='******' password='******' port=5433"
    conn=psycopg2.connect(conn_string)
    cur = conn.cursor()
    
    if urbancanvas_scenario_id == 0:
        query = "select nextval('developmentproject_id_seq');"
        nextval = sql.read_frame(query,conn)
        nextval = nextval.values[0][0]
        query = "select max(id)+1 from scenario_project;"
        id = sql.read_frame(query,conn)
        id = id.values[0][0]
        query = "INSERT INTO scenario(id, name) VALUES(%s, 'Run #%s');" % (nextval,nextval)
        cur.execute(query)
        conn.commit()
        query = "INSERT INTO scenario_project(id, scenario, project) VALUES(%s, %s, 1);" % (id,nextval)
        cur.execute(query)
        conn.commit()
        query = "select max(id)+1 from scenario_project;"
        id = sql.read_frame(query,conn)
        id = id.values[0][0]
        query = "INSERT INTO scenario_project(id, scenario, project) VALUES(%s, %s, %s);" % (id,nextval,nextval)
        cur.execute(query)
        conn.commit()
    else:
        nextval = urbancanvas_scenario_id
    nextval_string = '{' + str(nextval) + '}'
    building_df['projects'] = nextval_string
    
    valid_from = '{' + str(current_year) + '-1-1}'
    building_df['valid_from'] = valid_from
    building_df['land_area'] = 0
    building_df['tax_exempt'] = 0
    building_df['srcparc_id'] = '0'
    building_df['building_id'] = building_df.index.values
    #building_df['stories'] = 30 ###For testing!
    del building_df['unit_price_residential']
    del building_df['unit_price_non_residential']
    del building_df['building_sqft_per_job']
    del building_df['base_year_jobs']
    del building_df['non_residential_units']
    del building_df['all_units']
    
    print 'Exporting %s buildings to Urbancanvas database for project %s and year %s.' % (building_df.index.size,nextval,current_year)
    output = cStringIO.StringIO()
    building_df.to_csv(output, sep='\t', header=False, index=False)
    output.seek(0)
    cur.copy_from(output, 'building', columns =tuple(building_df.columns.values.tolist()))
    conn.commit()
    
    return nextval
 def get_val_from_uc_db(query):
     try:
         result = sql.read_frame(query, conn)
         return result.values[0][0]
     except:
         conn=psycopg2.connect(conn_string)
         cur = conn.cursor()
         orca.add_injectable('uc_conn', conn)
         orca.add_injectable('uc_cur', cur)
         result = sql.read_frame(query, conn)
         return result.values[0][0]
Example #11
0
 def get_val_from_uc_db(query):
     try:
         result = sql.read_frame(query, conn)
         return result.values[0][0]
     except:
         conn = psycopg2.connect(conn_string)
         cur = conn.cursor()
         sim.add_injectable('uc_conn', conn)
         sim.add_injectable('uc_cur', cur)
         result = sql.read_frame(query, conn)
         return result.values[0][0]
Example #12
0
 def readDFFromDB(self, table_name, limit=None):
     if not limit:
         df = pd_sql.read_frame('select * from %s' % table_name, self._connection)
     else:
         df = pd_sql.read_frame('select * from %s limit %s' % (table_name, limit), self._connection)
     try:
         df.index = pd.DatetimeIndex(df['date'])
         df.pop('date')
     except:
         self._log.error('** Creating dataframe index from sqlite read')
     return df
Example #13
0
 def get_val_from_uc_db(query):
     try:
         result = sql.read_frame(query, conn)
         return result.values[0][0]
     except:
         conn=psycopg2.connect(conn_string)
         cur = conn.cursor()
         orca.add_injectable('uc_conn', conn)
         orca.add_injectable('uc_cur', cur)
         result = sql.read_frame(query, conn)
         result2 = sql.read_frame("select column_name from Information_schema.columns where table_name like 'building' ", conn)
         print result2
         return result.values[0][0]
Example #14
0
	def load(self, product, **kwargs):
		dnow = datetime.datetime.now()
		fsettle = self.DATA + 'settle/' + product.lower() + '.sql'
		flive = self.DATA + 'live/' + product.lower() + '.sql'
		
		conn = sqlite3.connect(fsettle, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
		cur = conn.cursor()
		cur.execute('ATTACH \"%s\" AS live' % (flive))
		
		# build the basic query
		#query = 'SELECT timestamp as "[timestamp]" FROM %s'
		query = 'SELECT * FROM %s'
		conj = ' WHERE '
		if 'start' in kwargs:
			query += (conj + 'timestamp >= "%s"' % kwargs.get('start'))
			conj = ' AND '
		if 'end' in kwargs:
			query += (conj + 'timestamp < "%s"' % kwargs.get('end'))
			conj = ' AND '
		query += ' ORDER BY timestamp'
		
		self.opt_settle = sql.read_frame(query % 'options', conn)
		self.opt_settle['timestamp'] = self.opt_settle['timestamp'].apply(Timestamp)
		self.opt_settle['month'] = self.opt_settle['month'].apply(str)
		self.opt_settle.set_index(['timestamp', 'month', 'strike'], inplace=True)
		
		self.fut_settle = sql.read_frame(query % 'futures', conn)
		self.fut_settle['timestamp'] = self.fut_settle['timestamp'].apply(Timestamp)
		# how you multi-index depends on how you are going to use it. For a timeseries of ERM4 you want
		# to index by ['mon', 'timestamp'], while for looking at the evolution of the curve it would be
		# timestamp month. Note the need-for-sortedness too. 
		self.fut_settle.set_index(['mon', 'timestamp'], inplace=True)	
		self.fut_settle.sortlevel(0, inplace=True) # Do we need this? would have been better to do it when snapping. 
		
		if not self.valid_database('live.sqlite_master', cur):
			self.opt_live = None
			self.fut_live = None
			return
		
		self.fut_live = sql.read_frame(query % 'live.futures', conn)
		self.fut_live['timestamp'] = self.fut_live['timestamp'].apply(Timestamp)
		self.fut_live.set_index(['mon', 'timestamp'], inplace=True)	
		self.fut_live.sortlevel(0, inplace=True) 
		
		self.opt_live = sql.read_frame(query % 'live.options', conn)
		self.opt_live['timestamp'] = self.opt_live['timestamp'].apply(Timestamp)
		self.opt_live['month'] = self.opt_live['month'].apply(str)
		self.opt_live.set_index(['timestamp', 'month', 'strike'], inplace=True)
		
		conn.close()
Example #15
0
 def readDFFromDB(self, table_name, limit=None):
     if not limit:
         df = pd_sql.read_frame('select * from %s' % table_name,
                                self._connection)
     else:
         df = pd_sql.read_frame(
             'select * from %s limit %s' % (table_name, limit),
             self._connection)
     try:
         df.index = pd.DatetimeIndex(df['date'])
         df.pop('date')
     except:
         self._log.error('** Creating dataframe index from sqlite read')
     return df
Example #16
0
    def _read_from(self, table, conn):
        """
        Read from table
        """
        get_all = "SELECT * FROM "

        sql_type = self.sql_type

        if sql_type == "PG":
            dframe = sql.read_frame(get_all + '"' + table + '"', conn)

        else:
            dframe = sql.read_frame(get_all + '"' + table + '"', conn)

        return dframe
Example #17
0
def bus_sql_compare():
    '''
    bus_cols=['Bus_Num', 'Bus_Name', 'Bus_kV', 'Code', 'Area_Num', 'Zone_Num', \
              'Owner_Num','Voltage_pu','Angle']    
    '''
    global db
    # Busses in both files, but Pmax, Qmax or In_Service changed
    print('        Bus name changes')
    sql_str = '''
    select bus1.Bus_Num, bus1.Bus_Name as Name1, bus2.Bus_Name as Name2
           , bus1.Bus_kV, bus1.Area_Num, bus1.Zone_Num
           , 'name' as Change_type
    from bus1, bus2 
    where bus1.Bus_Num = bus2.Bus_Num 
        and bus1.Bus_Name != bus2.Bus_Name  '''
    bus_diff = sql.read_frame(sql_str, db)

    # Units only in file 1
    print('        buss dropped')
    sql_str = '''
    select bus1.Bus_Num, bus1.Bus_Name as Name1, bus2.Bus_Name as Name2
           , bus1.Bus_kV, bus1.Area_Num, bus1.Zone_Num
           , 'name' as Change_type
    from bus1
    LEFT JOIN bus2
    ON bus1.Bus_Num = bus2.Bus_Num 
    WHERE bus2.Bus_Num is NULL  '''
    bus_drop = sql.read_frame(sql_str, db)

    # concat results
    bus_diff = bus_diff.append(bus_drop)

    # Units only in file 2
    print('        buss added')
    sql_str = '''
    select bus1.Bus_Num, bus1.Bus_Name as Name1, bus2.Bus_Name as Name2
           , bus1.Bus_kV, bus1.Area_Num, bus1.Zone_Num
           , 'name' as Change_type
    from bus2
    LEFT JOIN bus1
    ON bus1.Bus_Num = bus2.Bus_Num 
    WHERE bus1.Bus_Num is NULL  '''
    bus_add = sql.read_frame(sql_str, db)

    # concat results
    bus_diff = bus_diff.append(bus_add)

    return bus_diff
Example #18
0
def bus_sql_compare():
    '''
    bus_cols=['Bus_Num', 'Bus_Name', 'Bus_kV', 'Code', 'Area_Num', 'Zone_Num', \
              'Owner_Num','Voltage_pu','Angle']    
    '''
    global db
    # Busses in both files, but Pmax, Qmax or In_Service changed
    print('        Bus name changes')
    sql_str = '''
    select bus1.Bus_Num, bus1.Bus_Name as Name1, bus2.Bus_Name as Name2
           , bus1.Bus_kV, bus1.Area_Num, bus1.Zone_Num
           , 'name' as Change_type
    from bus1, bus2 
    where bus1.Bus_Num = bus2.Bus_Num 
        and bus1.Bus_Name != bus2.Bus_Name  '''        
    bus_diff = sql.read_frame(sql_str, db)    
    
    # Units only in file 1
    print('        buss dropped')
    sql_str = '''
    select bus1.Bus_Num, bus1.Bus_Name as Name1, bus2.Bus_Name as Name2
           , bus1.Bus_kV, bus1.Area_Num, bus1.Zone_Num
           , 'name' as Change_type
    from bus1
    LEFT JOIN bus2
    ON bus1.Bus_Num = bus2.Bus_Num 
    WHERE bus2.Bus_Num is NULL  '''        
    bus_drop = sql.read_frame(sql_str, db)    
    
    # concat results
    bus_diff = bus_diff.append(bus_drop)
    
    # Units only in file 2
    print('        buss added')
    sql_str = '''
    select bus1.Bus_Num, bus1.Bus_Name as Name1, bus2.Bus_Name as Name2
           , bus1.Bus_kV, bus1.Area_Num, bus1.Zone_Num
           , 'name' as Change_type
    from bus2
    LEFT JOIN bus1
    ON bus1.Bus_Num = bus2.Bus_Num 
    WHERE bus1.Bus_Num is NULL  '''        
    bus_add = sql.read_frame(sql_str, db)    
    
    # concat results
    bus_diff = bus_diff.append(bus_add)
    
    return bus_diff
Example #19
0
def parse_data(origin, destination, delta_t = 7, verbose=True):
    '''
    Reads data from flight_data.db for a given origin, destination pair
    and generates / transforms all the data fields necessary for analysis

    Data Name Dictionary:
        QDATE: Query Date
        CXR: Airline
        DFLIGHT: Flight Number
        DTIME: Local Departure Time
        DTD: Days to Departure
        QDAY: Query Day of Week
        DDAY: Departure Day of Week
        DCHUNK: Depature Time of Day eg. morning, afternoon etc.
        DMONTH: Departure Month
        FARE: Current Fare Price in USD
        NFARE: Normalized Fare Price (see process_fare for more information)
        DFARE: Binary reprentation of forward difference in fare price
               where 1 signigifies an increase and 0 a decrease
        DPFARE_i: Percent trailing difference in fare price back i periods

    '''
    if verbose:
        print 'Reading data from flight_data.db...'
    market = ', '.join(["".join(["'",origin, destination,"'"])] + ["".join(["'",destination,origin,"'"])])
    cnx = sqlite3.connect('flight_data_3.db')
    query = 'SELECT QDATE, CXR, DFLIGHT, DDATE, DTIME, FARE \
             from flightdata WHERE MARKET in (%s)' % market
    df = sql.read_frame(query,cnx)

    if verbose:
        print 'Processing Results'
    airlines = list(set(df['CXR']))
    air_code_dict = {airline:i for i,airline in enumerate(airlines)}
    df['CXR'] = df['CXR'].apply(lambda x: air_code_dict[x])
    df['QDATE'] = df['QDATE'].apply(lambda x: dateParser((str(int(x))),0))
    df['DDATE'] = df['DDATE'].apply(lambda x: dateParser(x,1))
    if verbose:
        print 'Computing Days till Departure'
    df['DTD'] = df[['DDATE','QDATE']].apply(day_diff, axis = 1)
    df['DTIME'] = df['DTIME'].apply(lambda x: int(x[:2]))
    df['QDAY'] = df['QDATE'].apply(lambda x: x.weekday())
    df['DDAY'] = df['DDATE'].apply(lambda x: x.weekday())
    df['DCHUNK'] = df['DTIME'].apply(time_of_day)
    df['DMONTH'] = df['DDATE'].apply(lambda x: x.strftime('%m'))

    fare_dict = gen_fare_dict(df)
    if verbose:
        print 'Computing Fare Differences'
    df['DFARE'] = df.apply(lambda x: get_fare_delta(x,delta_t,fare_dict), axis = 1)
    df['DPFARE_1'] = df.apply(lambda x: get_fare_delta(x,delta_t,fare_dict,r_type='percent',time='past'), axis = 1)
    df['DPFARE_2'] = df.apply(lambda x: get_fare_delta(x,delta_t,fare_dict,r_type='percent',time='past'), axis = 1)

    if verbose:
        print 'Normalizing Fare Prices'
    df['NFARE'] = process_fare(df,verbose)

    if verbose:
        print 'Dropping null values'
    return df.dropna()
Example #20
0
    def run(self):
        import pandas.io.sql as sql
        upstreamDistance = self.i.upstreamDistance.val
        downstreamDistance = self.i.downstreamDistance.val

        if self.i.species_genome.val == "mm9":
            srmotifs_tablename = "mm9_gene_srmotifs_detail"
        elif self.i.species_genome.val == "hg19":
            srmotifs_tablename = "hg19_gene_srmotifs_detail"

        query = """ select genename, chrom, txstart-motif_start as tx_start_dist, cdsstart-motif_start as cds_start_dist, 
        txend-motif_start as tx_end_dist, txend-motif_end as tx_end_motif_end_dist, motif_end-motif_start,  
        gene_strand, motif_strand, motif_name, motif_score 
        FROM %(srmotifs_tablename)s WHERE motif_score >= 0.7 AND 
        (
          ( gene_strand = '+' AND (motif_start > (txstart - %(upstreamDistance)d) and motif_start < (txend + %(downstreamDistance)d) ) )
        OR
          ( gene_strand = '-' AND (motif_end > (txstart - %(downstreamDistance)d) and motif_end < (txend + %(upstreamDistance)d) )  )
        )
        """ % {
            "upstreamDistance": upstreamDistance,
            "downstreamDistance": downstreamDistance,
            "srmotifs_tablename": srmotifs_tablename
        }

        self.o.mm9_gene_motifs.val = sql.read_frame(query, conn)
Example #21
0
 def predict(self, predict_table_name, actual_label_col=''):
     ''' 
       Return predicted values using the trained model. Also return precision, recall & f-measure
       Input:
       ======
       predict_table_name : (String) the name of the table to be used for prediction
       actual_label_col : (String) the name of the actual label column (will be ignored if empty)
       
       Output:
       =======
       A dataframe of the prediction results
       
     '''
     #Transform the columns if any of them are categorical
     predict_table_name, _indep, _dep, _discard = pivotCategoricalColumns(self.dbconn,predict_table_name, 
                                                                self.model['indep_org'], 
                                                                actual_label_col,
                                                                self.model['col_distinct_vals_dict']
                                                 )
     stmt = '''
               select *, 
                      {madlib_schema}.array_dot(array{coef}::real[],{indep}) as prediction 
               from {table_name}
            '''.format(
                       coef=self.model['coef'],
                       indep=self.model['indep'],
                       table_name=predict_table_name,
                       madlib_schema=self.dbconn.getMADlibSchema()
                      )
     prediction_results = psql.read_frame(stmt,self.dbconn.getConnection())
     return prediction_results
Example #22
0
def get_qwop(connection,dateBeg,dateEnd,company):
    '''Quantity weighted offer price query, from Ramu'''
    q = """Select
         com.Fp_Offers.DTTM_ID,
         com.Fp_Offers.Trading_DATE as 'Date',
         com.Fp_Offers.Trading_Period as 'TP',
         com.MAP_PNode_to_POC_and_Island.Island,
         com.MAP_Participant_names.Parent_Company_ID,
         (Sum((com.Fp_Offers.Offer_Price * com.Fp_Offers.Offer_Quantity)) /
          Sum(com.Fp_Offers.Offer_Quantity)) As 'QWOP'
      From
         com.Fp_Offers Inner Join
         com.MAP_Participant_names On com.Fp_Offers.Trader_Id =
         com.MAP_Participant_names.Trader_Id Inner Join
         com.MAP_PNode_to_POC_and_Island On com.Fp_Offers.PNode =
         com.MAP_PNode_to_POC_and_Island.PNode
      Where
         com.Fp_Offers.Trading_DATE >= '%s' And
         com.Fp_Offers.Trading_DATE <= '%s' And
         com.Fp_Offers.trade_type = 'ENOF' And
         com.MAP_Participant_names.Parent_Company_ID = '%s' And
         com.MAP_PNode_to_POC_and_Island.Island = 'SI'
      Group By
         com.Fp_Offers.DTTM_ID, com.Fp_Offers.Trading_DATE,
         com.Fp_Offers.Trading_Period, com.MAP_PNode_to_POC_and_Island.Island,
         com.MAP_Participant_names.Parent_Company_ID
      order by
         com.Fp_Offers.DTTM_ID""" % (dateBeg.strftime("%Y-%m-%d"),dateEnd.strftime("%Y-%m-%d"),company)
    t = sql.read_frame(q,connection,coerce_float=True) 
    t['Date'] = t['Date'].map(lambda x: date_converter(x))
    t = t.set_index(['Date','TP']).QWOP
    return t
Example #23
0
def get_rm_demand(connection,dateBeg,dateEnd,company):
    '''rm demand by parent company, from Ramu'''
    q = """Select
        com.RM_Demand_by_trader.DTTM_ID,
        com.RM_Demand_by_trader.Trading_Date,
        com.RM_Demand_by_trader.Trading_Period,
        com.MAP_Participant_names.Parent_Company_ID,
        com.MAP_NSP_POC_to_region.ISLAND,
        Sum(com.RM_Demand_by_trader.RM_demand) As 'RMLoad'
   From
        com.RM_Demand_by_trader Inner Join
        com.MAP_Participant_names On com.RM_Demand_by_trader.Trader_ID =
          com.MAP_Participant_names.Trader_Id Inner Join
        com.MAP_NSP_POC_to_region On com.RM_Demand_by_trader.POC =
          com.MAP_NSP_POC_to_region.POC
   Where
        com.RM_Demand_by_trader.Trading_Date >= '%s' And
        com.RM_Demand_by_trader.Trading_Date <= '%s' And
        com.MAP_Participant_names.Parent_Company_ID = '%s'
   Group By
        com.RM_Demand_by_trader.DTTM_ID, com.RM_Demand_by_trader.Trading_Date,
        com.RM_Demand_by_trader.Trading_Period,
        com.MAP_Participant_names.Parent_Company_ID, com.MAP_NSP_POC_to_region.ISLAND
   Order By
        com.RM_Demand_by_trader.DTTM_ID,
        com.MAP_NSP_POC_to_region.ISLAND""" % (dateBeg.strftime("%Y-%m-%d"),dateEnd.strftime("%Y-%m-%d"),company)
    t = sql.read_frame(q,connection,coerce_float=True) 
    return t
Example #24
0
def getAllDataFrame(sym, startdate, dbconn):
    print "getAllDataFrame", sym, startdate
    sqlstr = "select * from stockeod where symbol='%s' and sdate>='%s'" % (
        sym, startdate)
    #cursor = dbconn.cursor()
    try:
        tb = ""
        #cursor.execute(sql)
        df = sql.read_frame(sqlstr, dbconn)
        #results = cursor.fetchall()
        #df = DataFrame(cursor.fetchall())
        #df.columns = cursor.keys()
        return df

        #import MySQLdb as mdb

        #from pandas import *

        #conn = mdb.connect('<server>','<user>','<pass>','<db>');

    except:
        print "error"
        tb = traceback.format_exc()
        print tb
        return
Example #25
0
def insert_statice():
    conn = MySQLdb.Connect(host='localhost', user='******', passwd='123456', db='cqlianjia_daily_monitoring',charset='utf8')
    cur=conn.cursor()
    #cur.excute('INSERT INTO signal_item_statics (date) VALUE (CURDATE());')
    currentdate = time.strftime('%Y_%m_%d',time.localtime(time.time()))
    sqlname = 'houseprice_'+currentdate
    
    cqpd = sql.read_frame('select area,price,view,unitprice from %s'%sqlname,conn)
    
    items= ['avr unit price_0',
            'avr total price_1',
            'total house quantity_2',
            'total house area_3',
            'MID unit price_4',
            'MID total price_5',
            'house quantity viewed_6',
            'AVR viewed times_7'
            ]
    avr = cqpd.mean()
    mid = cqpd.median(axis = 0)
    sumpd = cqpd.sum()
    items[0] = sumpd[1]/sumpd[0]*10000
    items[1] = avr[1]
    items[2] = len(cqpd)
    items[3] = sumpd[0]
    items[4] = mid[3]
    items[5] = mid[1]
    items[6] = len(cqpd[cqpd.view!=0])
    items[7] = cqpd[cqpd.view!=0].sum()[2]/items[6]
    
    cur.execute("insert into signal_item_statics values(CURDATE(),%s,%s,%s,%s,%s,%s,%s,%s)",items)
    conn.commit()
    cur.close()
    return
Example #26
0
def init_hops():
	g.db = connect_db()
	hopdf = sql.read_frame('select * from hop where id<63',g.db,'name')
	hopdf = hopdf[['alpha','beta','humulene','caryophyllene','cohumulone','myrcene','notes','origin','substitutes']]
	hopdf.to_csv('hopstr/static/hops.csv',index=True, cols=['alpha','beta','humulene','caryophyllene','cohumulone','myrcene'],header=['alpha','beta','humulene','caryophyllene','cohumulone','myrcene'])
	g.db.close()
	return hopdf
Example #27
0
def read_raw_featurs_from_DB(con):
    
    fields = ['twitter.links',\
    'twitter.user.verified',\
    'twitter.user.listed_count',\
    'twitter.text',\
    'twitter.mentions',\
    'twitter.mention_ids',\
    'klout.score',\
    'twitter.hashtags',\
    'twitter.user.statuses_count',\
    'twitter.user.followers_count',\
    'twitter.user.friends_count',\
    'twitter.user.geo_enabled',\
    'language.confidence',\
    'twitter.user.lang',\
    'twitter.created_at',\
    'twitter.user.created_at',\
    'Newsworthy']
    
    fieldsConc = '[' + '],['.join(fields) + ']'
    
    sqlQuery = "SELECT md.match_rowid , {} FROM MasterData AS md JOIN AnnotationsBatch4 AS an \
    ON md.match_rowid = an.match_rowid \
    WHERE an.Newsworthy IS NOT NULL".format(fieldsConc)
    data = psql.read_frame(sqlQuery, con)
    
    return data
Example #28
0
def get_rm_demand(connection, dateBeg, dateEnd, company):
    '''rm demand by parent company, from Ramu'''
    q = """Select
        com.RM_Demand_by_trader.DTTM_ID,
        com.RM_Demand_by_trader.Trading_Date,
        com.RM_Demand_by_trader.Trading_Period,
        com.MAP_Participant_names.Parent_Company_ID,
        com.MAP_NSP_POC_to_region.ISLAND,
        Sum(com.RM_Demand_by_trader.RM_demand) As 'RMLoad'
   From
        com.RM_Demand_by_trader Inner Join
        com.MAP_Participant_names On com.RM_Demand_by_trader.Trader_ID =
          com.MAP_Participant_names.Trader_Id Inner Join
        com.MAP_NSP_POC_to_region On com.RM_Demand_by_trader.POC =
          com.MAP_NSP_POC_to_region.POC
   Where
        com.RM_Demand_by_trader.Trading_Date >= '%s' And
        com.RM_Demand_by_trader.Trading_Date <= '%s' And
        com.MAP_Participant_names.Parent_Company_ID = '%s'
   Group By
        com.RM_Demand_by_trader.DTTM_ID, com.RM_Demand_by_trader.Trading_Date,
        com.RM_Demand_by_trader.Trading_Period,
        com.MAP_Participant_names.Parent_Company_ID, com.MAP_NSP_POC_to_region.ISLAND
   Order By
        com.RM_Demand_by_trader.DTTM_ID,
        com.MAP_NSP_POC_to_region.ISLAND""" % (
        dateBeg.strftime("%Y-%m-%d"), dateEnd.strftime("%Y-%m-%d"), company)
    t = sql.read_frame(q, connection, coerce_float=True)
    return t
Example #29
0
def _read_apsim_db(apsimDbConn, start, chunksize):
    '''
    Read apsimData.sqlite database.
    
    Parameters
    ----------
    apsimDbConn : sqlite connection object
        connection to database
    start : int
        where to start limiting the data returned
    chunksize : int
        size of chunks to read from the database
        
    Returns
    -------
    A dataframe of daily data.
    '''
    with apsimDbConn:
        # read data from the outputFields table
        outputFields = psql.read_frame("SELECT * FROM outputFields;", apsimDbConn)
        outputFields = list(outputFields['name'])
        outputFields = ', '.join(outputFields)
        
        # read main data
        sql = "SELECT point_id, {outputFields} FROM apsimOutput LIMIT {start}, {chunksize}".format(outputFields=outputFields, start=start, chunksize=chunksize)
        dailyData = pandas.io.sql.read_frame(sql, apsimDbConn)
    
    return dailyData
Example #30
0
def update_output_fields_table(masterDbConn, runPath):
    '''
    Updates the outputFields table in the master run database. If a
    field alredy exists it is skipped, otherwise it is added.
    
    Parameters
    ----------
    masterDbConn : sqlite connection object
        master database to connect to
    runPath : string
        path to the run folder for the apsimData.sqlite database for a 
        particular run
        
    Returns
    -------
    A list of fields that were updated in the table.
    '''
    
    # get the run database path
    apsimDbPath = os.path.join(runPath, 'data', 'apsimData.sqlite')
    
    # open run database
    apsimDbConn = lite.connect(apsimDbPath)
    
    with apsimDbConn:
        # read data from the outputFields table
        outputFields = psql.read_frame("SELECT * FROM outputFields;", apsimDbConn)
        
    with masterDbConn:
        # write outputFields to master database
        try:
            psql.write_frame(outputFields, 'outputFields', masterDbConn)
        except ValueError:# as e: # if table already exists then do nothing
            #print '*** Warning: {} Skipping write.'.format(e)
            pass
Example #31
0
def get_rm_generation(connection, dateBeg, dateEnd, company):
    '''rm generation by parent company, from Ramu'''
    q = """Select
       com.RM_Generation_by_trader.DTTM_ID,
       com.RM_Generation_by_trader.POC,
       com.MAP_Participant_names.Parent_Company_ID,
       Sum(com.RM_Generation_by_trader.RM_generation) As 'RMGen'
    From
       com.MAP_Participant_names Inner Join
       com.RM_Generation_by_trader On com.RM_Generation_by_trader.Trader_ID =
       com.MAP_Participant_names.Trader_Id
    Where
       com.RM_Generation_by_trader.Trading_Date >= '%s' And
       com.RM_Generation_by_trader.Trading_Date <= '%s' And
       com.MAP_Participant_names.Parent_Company_ID Like '%s'
    Group By
       com.RM_Generation_by_trader.DTTM_ID, com.RM_Generation_by_trader.POC,
       com.MAP_Participant_names.Parent_Company_ID
    Order By
       com.RM_Generation_by_trader.DTTM_ID,
       com.MAP_Participant_names.Parent_Company_ID,
       com.RM_Generation_by_trader.POC""" % (
        dateBeg.strftime("%Y-%m-%d"), dateEnd.strftime("%Y-%m-%d"), company)
    t = sql.read_frame(q, connection, coerce_float=True)
    return t
Example #32
0
def getAllDataFrame(sym,startdate,dbconn):
    print "getAllDataFrame",sym,startdate
    sqlstr = "select * from stockeod where symbol='%s' and sdate>='%s'" % (sym,startdate);
    #cursor = dbconn.cursor()
    try:
        tb=""
        #cursor.execute(sql)
        df = sql.read_frame(sqlstr, dbconn)
        #results = cursor.fetchall()
        #df = DataFrame(cursor.fetchall())
        #df.columns = cursor.keys()    
        return df
        
        #import MySQLdb as mdb

        #from pandas import *

        #conn = mdb.connect('<server>','<user>','<pass>','<db>');
        




    except:
        print "error"
        tb = traceback.format_exc()
        print tb
        return
def fetchHistData(codearg,namearg,symbol,startdate):
    print 'fetchHistData'
    dbname = 'hist_db_'+codearg+'_'+namearg+'.sqlite'
    con = sqlite3.connect("../data/hist/"+dbname)

    query = "SELECT * FROM sqlite_master WHERE type='table'"
    df = pd.io.sql.read_frame(query,con)

    tablelen = len(df)
    print 'hist tablelen:',tablelen    
    tablename = 'result_'+codearg+'_'+namearg

    histdf = pd_sql.read_frame("SELECT * from "+tablename, con)
    
    from pandas.lib import Timestamp
    histdf.Date = histdf.Date.apply(Timestamp)
    histdf2 = histdf.set_index('Date')

    histdf2 = histdf2[histdf2.index >= startdate]
    # histdf.index = histdf['Date']
    # histdf = histdf.drop('Date',1)
    print 'histdf from db:'
    print histdf2.head()
    print histdf2.tail()
    print 'hist index type:',type(histdf2.index)
    con.close()
    return histdf2
    def __init__(self):


        secrets_file = open('secrets.json','rb')
        secrets = json.load(secrets_file)
        secrets_file.close()

        self.blog_name = "wheredidmypostgo"

        # Build an Authorized Tumblr Client
        self.tb_client = pytumblr.TumblrRestClient(**secrets['tumblr_tokens'])
        self.etl_controller = app.etl_controller()

        max_end_date = date.today() - timedelta(days=3)


        sql = """
        select blog_name, avg(ClosenessCentrality) as 'ClosenessCentrality'
        from tb_reblog_graphs
        where reblogged_root_name in (%s)
        and end_date > '%s'
        and blog_name not in ('wheredidmypostgo', %s)
        group by blog_name
        order by avg(ClosenessCentrality) DESC
        """ % ("'"+"','".join(self.etl_controller.target_blogs)+"'", max_end_date.isoformat() , "'"+"','".join(self.etl_controller.target_blogs)+"'")
        
        self.influencer_df = psql.read_frame(sql,self.etl_controller.mysql_connection)
        self.influencer_df['pdf'] = self.influencer_df.ClosenessCentrality / self.influencer_df.ClosenessCentrality.sum()
        self.influencer_df['cdf'] = self.influencer_df.sort(column='pdf',ascending=False).pdf.cumsum()
        
        sql = """
        select tag
        from tb_posts
        inner join tb_posttag_level on tb_posttag_level.`post_id` = tb_posts.id
        where tb_posts.blog_name = 'wheredidmypostgo'
        """
        curs = self.etl_controller.mysql_connection.cursor()
        curs.execute(sql)
        all_tags = curs.fetchall()
            
        self.most_common_tags = [t[0] for t in Counter(all_tags).most_common(n=200)]
        
        curs.close()
        
        response = self.tb_client.posts('wheredidmypostgo', notes_info='true')
        self.posts = response['posts']
        for offset in range(20,response['total_posts'],20):
            response = self.tb_client.posts('wheredidmypostgo', notes_info='true', offset=offset)
            self.posts.extend(response['posts'])
            
        self.notes = []
        
        for p in self.posts:
            if p['note_count'] > 0:
                self.notes.extend(p['notes'])
                
        self.notes_df = pd.DataFrame(self.notes)
        self.notes_df['date'] = self.notes_df.timestamp.apply(float).apply(datetime.fromtimestamp)
        
        self.todays_notes = self.notes_df[self.notes_df.date >= (datetime.now() - timedelta(hours=4))].sort(column='date', ascending=False).head(50)
Example #35
0
def get_qwop(connection, dateBeg, dateEnd, company):
    '''Quantity weighted offer price query, from Ramu'''
    q = """Select
         com.Fp_Offers.DTTM_ID,
         com.Fp_Offers.Trading_DATE as 'Date',
         com.Fp_Offers.Trading_Period as 'TP',
         com.MAP_PNode_to_POC_and_Island.Island,
         com.MAP_Participant_names.Parent_Company_ID,
         (Sum((com.Fp_Offers.Offer_Price * com.Fp_Offers.Offer_Quantity)) /
          Sum(com.Fp_Offers.Offer_Quantity)) As 'QWOP'
      From
         com.Fp_Offers Inner Join
         com.MAP_Participant_names On com.Fp_Offers.Trader_Id =
         com.MAP_Participant_names.Trader_Id Inner Join
         com.MAP_PNode_to_POC_and_Island On com.Fp_Offers.PNode =
         com.MAP_PNode_to_POC_and_Island.PNode
      Where
         com.Fp_Offers.Trading_DATE >= '%s' And
         com.Fp_Offers.Trading_DATE <= '%s' And
         com.Fp_Offers.trade_type = 'ENOF' And
         com.MAP_Participant_names.Parent_Company_ID = '%s' And
         com.MAP_PNode_to_POC_and_Island.Island = 'SI'
      Group By
         com.Fp_Offers.DTTM_ID, com.Fp_Offers.Trading_DATE,
         com.Fp_Offers.Trading_Period, com.MAP_PNode_to_POC_and_Island.Island,
         com.MAP_Participant_names.Parent_Company_ID
      order by
         com.Fp_Offers.DTTM_ID""" % (dateBeg.strftime("%Y-%m-%d"),
                                     dateEnd.strftime("%Y-%m-%d"), company)
    t = sql.read_frame(q, connection, coerce_float=True)
    t['Date'] = t['Date'].map(lambda x: date_converter(x))
    t = t.set_index(['Date', 'TP']).QWOP
    return t
 def GetRowsDataFrameFromSelect(self, SelectSql):
     conn = self.Connect(as_dict=False)    
     print SelectSql
     df = sql.read_frame(SelectSql, conn)
     
     print '  Rows read = ' + str(df.shape[0])
     conn.close()
     return df
Example #37
0
 def get(self, lowerupper):
     lower, upper = lowerupper.split(":")
     print lower, upper
     lower = str(int(lower) + 1)
     conn = get_db()
     sql = "SELECT * FROM signal WHERE ROWID >= %s  AND ROWID < %s" % (lower, upper)
     df = psql.read_frame(sql, conn)
     return jsonify(df.to_dict())
Example #38
0
def data_firm():
    query = '''
    select firm_name,website_url
    from investor_company_url
    where result_rank = 0;'''
    url_data = psql.read_frame(query, conn)
    url_json = url_data.to_json()
    url_json_load = json.loads(url_json)
    return json.dumps(url_json_load)
Example #39
0
    def _check_roundtrip(self, frame):
        sql.write_frame(frame, name='test_table', con=self.db)
        result = sql.read_frame("select * from test_table", self.db)

        # HACK!
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)
Example #40
0
def data_firm():
    query = """
    select firm_name,website_url
    from investor_company_url
    where result_rank = 0;"""
    url_data = psql.read_frame(query, conn)
    url_json = url_data.to_json()
    url_json_load = json.loads(url_json)
    return json.dumps(url_json_load)
Example #41
0
    def _check_roundtrip(self, frame):
        sql.write_frame(frame, name="test_table", con=self.db)
        result = sql.read_frame("select * from test_table", self.db)

        # HACK!
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame["txt"] = ["a"] * len(frame)
        frame2 = frame.copy()
        frame2["Idx"] = Index(range(len(frame2))) + 10
        sql.write_frame(frame2, name="test_table2", con=self.db)
        result = sql.read_frame("select * from test_table2", self.db, index_col="Idx")
        expected = frame.copy()
        expected.index = Index(range(len(frame2))) + 10
        tm.assert_frame_equal(expected, result)
Example #42
0
 def get(self, lowerupper):
     lower, upper = lowerupper.split(':')
     print lower, upper
     lower = str(int(lower) + 1)
     conn = get_db()
     sql = 'SELECT * FROM signal WHERE ROWID >= %s  AND ROWID < %s' % (
         lower, upper)
     df = psql.read_frame(sql, conn)
     return jsonify(df.to_dict())
Example #43
0
    def updateRangeInfo(self,seriesNames, logfile = None):
        ''' updates the range info in seriesList so that it matches what's in db.
        should be run before/after importing new series
        '''

        
        errortables = []
        
        if logfile !=None:
            logging.basicConfig(filename= logfile, filemode='w', level = logging.ERROR)
       
        self.connect()
        
        for name in seriesNames:
            
            logging.info("updateRangeInfo for {}".format(name))
            sqlRead = "SELECT Date FROM {0}".format(name)
            
            #Read series data range
            try:
                dates = psql.read_frame(sqlRead, 
                                       con = self.con
                                       )
            except Exception as e:
                errortables.append(name)
                logging.error("updateRangeInfo: Reading table, encountered error <<{0}>>".format(e))
                continue
               
               
            #Convert to datetime objects
            dates = dates.apply(pd.to_datetime)
            StartRange = dates.min().iloc[0] #still series object so have to get data
            EndRange = dates.max().iloc[0]
            
            #Construct sql update query
            sqlWrite = "UPDATE SeriesList SET StartRange = '{0}', ".format(StartRange)
            sqlWrite += "EndRange = '{0}' ".format(EndRange)
            sqlWrite += "WHERE SeriesName = '{0}';".format(name)
            
            #print sqlWrite
            
            cur = self.con.cursor()
            
            try:
                cur.execute(sqlWrite)
            
            except Exception as e:
                logging.error("updateRangeInfo: Error executing write dates, encountered error <<{0}>>".format(e))
                errortables.append(name)
                continue
            
            else:     
                self.con.commit()
                
            
        self.disconnect()
        return errortables
Example #44
0
def plot():
    consqlite = sqlite3.connect('d:\\DataMining\\Tesis\\DATASET SUBE\\base.db')
    consqlite.text_factory = str
    df = sql.read_frame(
        'SELECT  MINDISTANCIA from MINDISTANCIA WHERE LINEAMT = 114 AND RAMALMT = 361  AND RAMALGEO = "B" ',
        consqlite)

    df = filtroDesvioStd(df, 'MINDISTANCIA', 6)
    hist([val[0] for val in df.values], 100, (0, 200))

    #BOXPLOT
    consqliteBase = sqlite3.connect('base.db')
    consqliteBase.text_factory = str
    df = sql.read_frame(
        'SELECT A.*, B.AREAGEOGRAFICA FROM LINEASTRXGEO  A, (SELECT DISTINCT LINEA, RAMAL, AREAGEOGRAFICA FROM  LINEARAMALTRX) B WHERE A.LINEAMT = B.LINEA AND A.RAMALMT = B.RAMAL',
        consqliteBase)
    df = sql.read_frame('SELECT A.* FROM LINEASTRXGEO A ', consqliteBase)

    figure()
    boxplot(df['RMSE'], 0, 'gD', 0)
    boxplot(df['RMSE'], 0, '', 0)

    boxplot(
        df[df['AREAGEOGRAFICA'] == '1']['RMSE'],
        0,
    )

    #%pylab

    boxplot(df[df['AREAGEOGRAFICA'] == '12']['RMSE'].values, 0)
    boxplot(df[df['AREAGEOGRAFICA'] == '13']['RMSE'].values, 0)

    boxplot([
        df[df['AREAGEOGRAFICA'] == '1']['RMSE'],
        df[df['AREAGEOGRAFICA'] == '12']['RMSE'],
        df[df['AREAGEOGRAFICA'] == '13']['RMSE']
    ], 0, 'gx', 0)
    boxplot([
        df[df['AREAGEOGRAFICA'] == '1']['RMSE'],
        df[df['AREAGEOGRAFICA'] == '12']['RMSE'],
        df[df['AREAGEOGRAFICA'] == '13']['RMSE']
    ], 0, '')

    hist(df['RMSE'], 20)
Example #45
0
def analyze(sqlite_path):
    con = sql.connect(sqlite_path)
    df = pd_sql.read_frame(
        "SELECT user_id, artist, timestamp FROM tracks LIMIT 1000",
        con,
        index_col='timestamp')
    df.index = pd.to_datetime(df.index.values * 1e9)
    uac_df = df.groupby(['user_id', 'artist']).resample('M', how='count')
    #uac_df.reindex(pd.date_range(min(uac_df.index), max(uac_df.index)))
    print uac_df.head(10)
Example #46
0
    def _check_roundtrip(self, frame):
        sql.write_frame(frame, name='test_table', con=self.db)
        result = sql.read_frame("select * from test_table", self.db)

        # HACK!
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame['txt'] = ['a'] * len(frame)
        frame2 = frame.copy()
        frame2['Idx'] = Index(range(len(frame2))) + 10
        sql.write_frame(frame2, name='test_table2', con=self.db)
        result = sql.read_frame("select * from test_table2",
                                self.db,
                                index_col='Idx')
        expected = frame.copy()
        expected.index = Index(range(len(frame2))) + 10
        tm.assert_frame_equal(expected, result)
Example #47
0
def get_raw_count_for_all(table, valid_criteria):
    query="""
    SELECT 
            count(*) as count
    FROM    
            package, %s t inner join unit_location ul on ul.id = t.location_id 
    WHERE 
            ul.package_id = package.id and 
            target like '%%ispmon.samknows.%%.measurement-lab.org' 
            %s ; """ % (table, valid_criteria) 
    return psql.read_frame(query, db)
Example #48
0
def get_raw_tests_for_host(hostname, table, column, valid_criteria):
    query="""
    SELECT 
            dtime as date, date(dtime) as day, ul.package_id as package_id, %s
    FROM    
            package, %s t inner join unit_location ul on ul.id = t.location_id 
    WHERE 
            ul.package_id = package.id and 
            target like '%%ispmon.samknows.%s.measurement-lab.org' 
            %s ; """ % (column, table, hostname, valid_criteria) 
    return psql.read_frame(query, db)
Example #49
0
def get_development_projects():
    conn_string = orca.get_injectable('conn_string')
    if len(conn_string) == 0:
        print 'A "conn_string" injectable must be registered and populated. Skipping export-to-Urban-Canvas.'
        return None
    conn = psycopg2.connect(conn_string)
    cur = conn.cursor()

    print "Loading committed development projects table"
    devproj_query = "select id, placetype_id as building_type_id, duration, buildings_number, average_floors as stories, sqft as non_residential_sqft, sqft_unit as sqft_per_unit, units as residential_units, Name as name, start_date from developmentprojects where committed = 'TRUE';"
    devproj = sql.read_frame(devproj_query, conn)
    devproj['year_built'] = devproj.start_date.astype('object').astype('str')
    devproj.year_built = devproj.year_built.str.slice(start=0, stop=4)
    devproj.year_built = devproj.year_built.astype('int')

    print "Loading development project parcels"
    dp_pcl_query = "select developmentprojects_parcels.development_project, developmentprojects_parcels.parcel_id, parcel.parcel_acres from developmentprojects_parcels, parcel where developmentprojects_parcels.parcel_id = parcel.parcel_id;"
    dp_pcl = sql.read_frame(dp_pcl_query, conn)
    devproject_parcel_ids = dp_pcl.groupby(
        'development_project').parcel_id.max().reset_index(
        )  ##In future, use the parcel_acres field on this tbl too

    scheduled_development_events = pd.merge(devproject_parcel_ids,
                                            devproj,
                                            left_on='development_project',
                                            right_on='id')
    scheduled_development_events = scheduled_development_events.rename(
        columns={
            'development_project': 'scheduled_development_event_id',
            'building_type_id': 'development_type_id'
        })
    scheduled_development_events = scheduled_development_events[[
        'scheduled_development_event_id', 'year_built', 'development_type_id',
        'stories', u'non_residential_sqft', 'sqft_per_unit',
        'residential_units', 'parcel_id'
    ]]
    for col in scheduled_development_events:
        scheduled_development_events[col] = scheduled_development_events[
            col].astype('int')

    return scheduled_development_events
Example #50
0
def main():
    con = MySQLdb.connect(host="bigblasta.chiim1n4uxwu.eu-central-1.rds.amazonaws.com", user="******", passwd="Jo27051980", db="bigblasta")
    cursor = con.cursor()
    df = sql.read_frame('select t2.tid, t2.aid from (select tid from start_tracks order by rand() limit 1000)t1 inner join tracks t2 on t1.tid = t2.tid group by 1,2', con)
    subset = df[['tid', 'aid']]
    start_tracks = [tuple(x) for x in subset.values]
    if len(start_tracks)>0:
        # create a client object with your app credentials
        from multiprocessing.dummy import Pool as ThreadPool 
        pool = ThreadPool(1) 
        pool.map(processTrackEcho, start_tracks)
        pool.close()
Example #51
0
def _get_avg_data(apsimDbConn, pointDailyData, harvestDates, sowDate):
    '''
    Determines seasonal averages for data.
    
    Parameters
    ----------
    apsimDbConn : sqlite connection object
        connection to database
    dailyData : pandas dataframe
        daily data values, indexed by date
    harvestDates : pandas dataframe
        string date of harvesting, indexed by year
    sowDate : string
        date of sowing (dd-mmm)
        
    Returns
    -------
    Dataframe of yearly average data (rain, mint, maxt, radn, and irr_fasw).
    '''
    # get unique years from data
    years = np.unique(pointDailyData.index.year)
    
    # convert sowDate to correct format
    sowDate = strptime(sowDate,'%d-%b')
    
    # read data from the outputFields table
    with apsimDbConn:
        outputFields = psql.read_frame("SELECT * FROM outputFields;", apsimDbConn)
    outputFields = list(outputFields['name'])
    outputFields.remove('date')
    outputFields.remove('yield')
    
    yearlyAvgData = pandas.DataFrame({})
    for field in outputFields:
        dataAvgs = {}
        for year in years:
            harvestDate = harvestDates[year]
            
            # check if harvestDate is a string
            if type(harvestDate) == type(''):
                rng = pandas.date_range('{0}/{1}/{2}'.format(sowDate.tm_mon, sowDate.tm_mday, year), harvestDate)
                
                # get the avg values and add to dataAvgs dictionary
                pointDailyDataMean = pointDailyData[field].ix[rng].mean()
                dataAvgs[year] = pointDailyDataMean
            else: # if harvestDate is not a string, set as NaN
                dataAvgs[year] = np.nan

        #print dataAvgs
        yearlyAvgData[field] = pandas.Series(dataAvgs)
        #print yearlyAvgData[field].head()
                                   
    return yearlyAvgData
Example #52
0
    def _check_roundtrip(self, frame):
        sql.write_frame(frame, name='test_table', con=self.db)
        result = sql.read_frame("select * from test_table", self.db)

        # HACK! Change this once indexes are handled properly.
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame['txt'] = ['a'] * len(frame)
        frame2 = frame.copy()
        frame2['Idx'] = Index(lrange(len(frame2))) + 10
        sql.write_frame(frame2, name='test_table2', con=self.db)
        result = sql.read_frame("select * from test_table2", self.db,
                                index_col='Idx')
        expected = frame.copy()
        expected.index = Index(lrange(len(frame2))) + 10
        expected.index.name = 'Idx'
        print(expected.index.names)
        print(result.index.names)
        tm.assert_frame_equal(expected, result)