Esempi in Python per read_sql_table, esempi in Python per pandas.read_sql_table

Esempio n. 1

0

Mostra file

File: Loader.py Progetto: cliffordhuang/MessagePlayground

 def _load_chat(self):
   """Return df of iMessage chats."""
   # Create sql engine for pandas connection.
   im_engine = create_engine('sqlite:////Users/{u}/Library/Messages/chat.db'.format(u=self.username))
   # Load tables into DataFrames.
   handle_cols = ['ROWID', 'id']
   df_handle = pd.read_sql_table(table_name='handle', con=im_engine, columns=handle_cols)
   df_chat_handle_join = pd.read_sql_table(table_name='chat_handle_join', con=im_engine)
   chat_cols = ['ROWID', 'display_name', 'chat_identifier']
   df_chat = pd.read_sql_table(table_name='chat', con=im_engine, columns=chat_cols)
   df_chat_msg_join = pd.read_sql_table(table_name='chat_message_join', con=im_engine)
   msg_cols = ['ROWID', 'text', 'date', 'is_emote', 'is_from_me', 'handle_id']
   df_msg = pd.read_sql_table(table_name='message', con=im_engine, columns=msg_cols)
   # Join iMessage tables into single DataFrame. 
   ## Remove unnecessary fields before each join.
   df = df_handle.merge(df_chat_handle_join, left_on='ROWID', right_on='handle_id', how='left')
   df.drop(labels=['ROWID'], axis=1, inplace=True)
   df = df.merge(df_chat, left_on='chat_id', right_on='ROWID', how='left')
   df.drop(labels=['ROWID'], axis=1, inplace=True)
   df = df.merge(df_chat_msg_join, on='chat_id', how='left')
   df.drop(labels=['chat_id'], axis=1, inplace=True)
   df = df.merge(df_msg, left_on=['message_id', 'handle_id'], right_on=['ROWID', 'handle_id'], how='left')
   df.drop(labels=['ROWID', 'message_id'], axis=1, inplace=True)
   # Parse timestamp.
   ## Use 978307200 to convert Apple NSDate to Unix Epoch time. 
   ns_conversion = 978307200
   _date_func = lambda x: dt.fromtimestamp(x + ns_conversion) if not np.isnan(x) else dt.now()
   df.date = df.date.apply(_date_func)
   return df

Esempio n. 2

0

Mostra file

File: parameter_importance.py Progetto: lzlarryli/limelight

def rank(dataset, force_overwrite = False):
    # name of the result table
    res_tb = dataset + '_result'
    # read even when cached. column names are extracted from X
    tb = pd.read_sql_table(dataset, db, index_col = 'ID')
    X = tb.iloc[:, :-1]; y = tb.iloc[:, -1]
    # check if it is cached
    if res_tb in db.table_names() and not force_overwrite:
        # yes, get it
        res = pd.read_sql_table(res_tb, db, index_col = 'index')
    else:
        # no, compute it
        # remove low var columns
        low_var_cols = X.columns[X.var() < 1e-5]
        X.drop(low_var_cols, axis = 1, inplace = True)
        # rank
        (rank1, R2) = rfe_with_grid_search(X.values, y,
                                          RandomForestRegressor(n_jobs = -1),
                                          [{'n_estimators': [5, 10, 30],
                                            'max_features': [1.0]}])
        (rank2, scores) = lassocv_n_random_lasso(X, y)
        res = pd.DataFrame(np.array([X.columns[rank1], R2,
                                     X.columns[rank2], scores]).T,
                           columns = ['rfe_random_forest',
                                      'R2',
                                      'randomized_lasso',
                                      'scores'])
        
        res.to_sql(res_tb, db, if_exists = 'replace')
    return (res['rfe_random_forest'], res['R2'],
            res['randomized_lasso'], res['scores'])

Esempio n. 3

0

Mostra file

File: views.py Progetto: gomar/woney

def get_balance():
    accounts = pd.read_sql_table('account', db.engine)
    transactions = pd.read_sql_table('transaction', db.engine, columns=['account', 'amount'])
    scheduled_transactions = pd.read_sql_table('scheduled_transaction', 
                                               db.engine) 

    transactions = transactions.rename(columns={'account': 'name'})
    transactions = transactions.groupby('name', as_index=False).sum()
    accounts['amount'] = accounts['reconciled_balance']
    for name in transactions.name:
        accounts.ix[accounts['name'] == name, 'amount'] += \
            transactions.ix[transactions.name == name, 'amount'].iloc[-1]
    # taking scheduled transactions into account
    accounts['end_of_month_amount'] = accounts['amount']
    for idx, operation in scheduled_transactions.iterrows():
        i = 0
        today = datetime.datetime.now()
        last_day_of_month = today + relativedelta(day=1, months=+1, days=-1)
        while operation.next_occurence \
            + relativedelta(**{operation.every_type: i * operation.every_nb}) \
            <= last_day_of_month:
            i += 1
        accounts.ix[accounts['name'] == operation.account, 'end_of_month_amount'] += \
            operation.amount * i
    return accounts

Esempio n. 4

0

Mostra file

File: dataloader.py Progetto: neuralyzer/kerasvis

 def __init__(self, path=None):
     if path is None:
         path = "sqlite:///" + os.path.join(os.environ["HOME"], "tmp", "keras_logs.db")
     db_path = path.replace("sqlite:///", "")
     try:
         self.logs = pd.read_sql_table("log", path)
         self.runs = pd.read_sql_table("run", path).rename(columns={"id": "runid"}).sort_values("runid", ascending=False)
         self.df = self.logs.merge(self.runs)
     except ValueError:
         self.runs = pd.DataFrame({"runid":[], "comment":[], "user":[]})

Esempio n. 5

0

Mostra file

File: createFinalData.py Progetto: HenryDev/MLND-P5-DiDi-Chuxing-Algorithm-Competition

def getAllData(poi_pca,con):
    order_info = pd.read_sql_table('compressed_districts',con=con)
    weather_info = pd.read_sql_table('weather_info',con=con)
    cluster_map = pd.read_sql_table('district_info',con=con)
    traffic_info = pd.read_sql_table('traffic_info',con=con)
    poi_info = pd.read_sql_table('poi_info',con=con)
    fixed_weather = dw.fixWeatherData(weather_info)
    fixed_traffic = dw.fixTrafficData(traffic_info,cluster_map)
    fixed_poi,expected_var = cp.compressPoiData(poi_info.fillna(0),poi_pca)
    return order_info,fixed_poi,fixed_weather,fixed_traffic

Esempio n. 6

0

Mostra file

File: pdsql.py Progetto: allisnone/tradeStrategy

 def get_table_df(self,table,columns=None):
     """
     :param table: string type, db_name.table_name
     :param columns: lit type with string value, like: ['acc_name', 'initial']
     :return: DataFrame type
     """
     if columns:
         return pd.read_sql_table(table, self.engine)
     else:
         return pd.read_sql_table(table, self.engine, columns)

Esempio n. 7

0

Mostra file

File: view.py Progetto: ahtanwang/p_stock

def ShowGS(com1, com2, com3):
	df2 = Codes[Codes['code'].isin([com1])]
	print df2

	if com2 == 'syl30':
		tname = 'b'+com1
		if me.IsTableExist(tname, G_DBengine) == False:
			print 'No table ....%s'%tname
			return
		df = pd.read_sql_table(tname,G_DBengine)
		if df.index.size > 250:
			df1 = df.drop(range(df.index.size - 250))
		me.PinghuaDF(df1, md.BI_syl30+1, 5)
		plt.title(com1 + '  ' +  com2 + '  '  + str(max(df1['date'])))
		plt.fill_between(df1.index, df1['syl30'], 0, where=df1['syl30']>0,facecolor='red')
		plt.fill_between(df1.index, df1['syl30'], 0, where=df1['syl30']<=0,facecolor='green')
		
	elif com2 == 'syl250':
		tname = 'b'+com1
		if me.IsTableExist(tname, G_DBengine) == False:
			print 'No table ....%s'%tname
			return	
		df = pd.read_sql_table(tname,G_DBengine)
		me.PinghuaDF(df, md.BI_syl250+1, 30)
		plt.fill_between(df.index, df['syl250'], 0, where=df['syl250']>0,facecolor='red')
		plt.fill_between(df.index, df['syl250'], 0, where=df['syl250']<=0,facecolor='green')
		plt.title(com1 + '  ' +  com2 + '  '  + str(max(df['date'])))
		View_10X(plt,df, '123')
		
	elif com2 == 'hb':
		tname = 'f'+com1	
		if me.IsTableExist(tname, G_DBengine) == False:
			print 'No table ....%s'%tname
			return	
		df = pd.read_sql_table('f'+com1,G_DBengine)
		df[1:df.index.size][['sjsrhb','sjlrhb']].plot(kind='bar',color={'red','green'})
		df[1:df.index.size]['nhgdqyl'].plot(color='blue', secondary_y=True, linewidth = LW)
		plt.title(com1 + '  ' +  com2 + '  ' + str(df.loc[df.index.size-1,'year']) + '  ' + str(df.loc[df.index.size-1,'season'])  )

	elif com2 == 'sr':
		tname = 'f'+com1	
		if me.IsTableExist(tname, G_DBengine) == False:
			print 'No table ....%s'%tname
			return	
		df = pd.read_sql_table('f'+com1,G_DBengine)
		df[1:df.index.size]['sjsr'].plot(kind='bar',color='green')
		df[1:df.index.size]['sjlr'].plot(color='red', secondary_y=True, linewidth=LW)				
		plt.title(com1 + '  ' +  com2 + '  ' + str(df.loc[df.index.size-1,'year']) + '  ' + str(df.loc[df.index.size-1,'season'])  )		
		
	else:
		print '[Error] input error ...'
		return
		
	plt.show()
	plt.close()

Esempio n. 8

0

Mostra file

File: compare.py Progetto: joepax/openpharma

def read_data():
    """read data from mysql"""

    # open connection and read table
    my_db = sa.engine.url.URL(drivername = 'mysql',
                              database = 'openpharma_db',
                              query = {'read_default_file' : '~/.my.cnf'}
                              )
    engine = sa.create_engine(name_or_url = my_db)
    df_class = pd.read_sql_table(table_name = 'classifier_tb', con = engine)
    df_trial = pd.read_sql_table(table_name = 'trials_tb', con = engine)

    return df_class, df_trial

Esempio n. 9

0

Mostra file

File: summarize_opsim.py Progetto: rbiswas4/OpSimSummary

    def fromOpSimDB(cls, dbname, subset='combined'):
	"""
	Class Method to instantiate this from an OpSim sqlite
	database output

	Parameters
	----------
	dbname :
	subset :
	"""
	allowed_subsets = cls.get_allowed_subsets()
	subset = subset.lower()
	if subset not in allowed_subsets:
	    raise NotImplementedError('subset {} not implemented'.\
				      format(subset))
        if not dbname.startswith('sqlite'):
            dbname =  'sqlite:///' + dbname
        print(' reading from database {}'.format(dbname))
        engine = create_engine(dbname, echo=False)
	# Read the proposal table to find out which propID corresponds to
        proposals = pd.read_sql_table('Proposal', con=engine)
        propDict = cls.get_propIDDict(proposals)

        # Do the actual sql queries or table reads
        if subset in ['_all', 'unique_all']:
            # In this case read everything (ie. table read)
	    summary = pd.read_sql_table('Summary', con=engine)
            # _all will be used only to write out other serialized versions
            # of OpSim. Do not drop duplicates, so that different subsets can
            # be constructed from the same hdf file
	    if subset == 'unique_all':
	       summary.drop_duplicates(subset='obsHistID', inplace=True)	
            summary.set_index('obsHistID', inplace=True)
	    return cls(propIDDict=propDict, summary=summary,
                       proposalTable=proposals)
	else:
	    sql_query = 'SELECT * FROM Summary WHERE PROPID'
	    if subset == 'ddf':
		sql_query += ' == {0}'.format(propDict['ddf'])
	    if subset == 'wfd':
		sql_query += ' == {0}'.format(propDict['wfd'])
	    if subset == 'combined':
		sql_query += ' in [{0}, {1}]'.format(propDict['wfd'],
                                                     propDict['ddf'])
        # Read the summary table 
        summary = pd.read_sql_query(sql_query, con=engine)
	summary.drop_duplicates(subset='obsHistID', inplace=True)
	summary.set_index('obsHistID', inplace=True)
        return cls(propIDDict=propDict, summary=summary,
                   proposalTable=proposals)

Esempio n. 10

0

Mostra file

File: sum.py Progetto: joepax/openpharma

def read_data():
    """read data from mysql"""

    print "Reading data..." 

    # open connection and read table
    my_db = sa.engine.url.URL(drivername = 'mysql',
                              database = 'openpharma_db',
                              query = {'read_default_file' : '~/.my.cnf'}
                              )
    engine = sa.create_engine(name_or_url = my_db)
    df_compare = pd.read_sql_table(table_name = 'compare_tb', con = engine)
    df_packages = pd.read_sql_table(table_name = 'packages_tb', con = engine)

    return df_compare, df_packages

Esempio n. 11

0

Mostra file

File: redcap_form_locker.py Progetto: weiweichusri/ncanda-data-integration

def unlock_form(project_name, arm_name, event_descrip, form_name, engine, subject_id = None):
    """
    Unlock a given form be removing records from table

    :param project_name: str
    :param arm_name: str
    :param event_descrip: str
    :param form_name: str
    :param engine: `sqlalchemy.Engine`
    :param subject_id: str
    :return: None
    """
    # get ids needed for unlocking
    project_id = get_project_id(project_name, engine)
    arm_id = get_arm_id(arm_name, project_id, engine)
    event_id = get_event_id(event_descrip, arm_id, engine)
    # get a list of all the locked records and filter for records to remove
    locked_records = pd.read_sql_table('redcap_locking_data', engine)
    locked_forms = locked_records[(locked_records.project_id == project_id) &
                                  (locked_records.event_id == event_id) &
                                  (locked_records.form_name == form_name)]
    if subject_id : 
        locked_forms = locked_forms[(locked_forms.record == subject_id)]

    # generate the list of ids to drop and remove from db table
    global locked_list
    locked_list = ', '.join([str(i) for i in locked_forms.ld_id.values.tolist()])
    if locked_list:
        sql = 'DELETE FROM redcap_locking_data ' \
              'WHERE redcap_locking_data.ld_id IN ({0});'.format(locked_list)
        execute(sql, engine)
        return True
    else :
        return False

Esempio n. 12

0

Mostra file

File: plot_time_mem.py Progetto: Shenglai/apipe

def plot_time_mem_picard_val(pre_post_fastqc_readcount_match_df,data_engine,logger):
    time_mem_picard_validatesamfile_df=pd.read_sql_table('time_mem_picard_validatesamfile',data_engine)

    df=pd.merge(pre_post_fastqc_readcount_match_df,time_mem_picard_validatesamfile_df,how='inner',on='uuid')
    df_pre=df
    df_pos=df[df['bam_path'].str.contains('realn')]
    ndf=df[['pre_count','wall_clock']]
    ndf=ndf.convert_objects(convert_numeric=True)
    ndf['wall_clock']=ndf['wall_clock']/3600
    ndf['pre_count']=ndf['pre_count']/1000000
    ax=ndf.plot(x='pre_count',y='wall_clock',kind='scatter')
    
    yrange_max=math.ceil(max(ndf['wall_clock']))
    xmod=ndf['pre_count']/500
    xmod_ceil=math.ceil(max(xmod))
    xrange_max=xmod_ceil*500
    ax.set_title('picard ValidateSamFile')
    ax.set_xlim([0,xrange_max])
    ax.set_ylim([0,yrange_max])
    ax.set_xlabel('readcount (millions)')
    ax.set_ylabel('run time (hr)')
    fig=ax.get_figure()
    
    fig.savefig('picard_validate_readcount_preharmonize.png',dpi=600)
    fig.savefig('picard_validate_readcount_postharmonize.png',dpi=600)
    fig.savefig('picard_validate_filesize_preharmonize.png',dpi=600)
    fig.savefig('picard_validate_filesize_postharmonize.png',dpi=600)

Esempio n. 13

0

Mostra file

File: data_to_sql.py Progetto: hbwzhsh/stock

def download_all_stock_history_k_line():
    print "download all stock k-line start"

    try:
        if cm.DB_WAY == "csv":
            df = pd.DataFrame.from_csv(cm.DownloadDir + cm.TABLE_STOCKS_BASIC + ".csv")
            # se = df.loc[int(code)]
            # se = df.ix[code]
            pool = ThreadPool(processes=20)
            pool.map(download_stock_kline, df.index)
            pool.close()
            pool.join()
        elif cm.DB_WAY == "redis":
            codes = r.smembers(cm.INDEX_STOCK_BASIC)
            # codes = r.lrange(cm.INDEX_STOCK_BASIC, 0, -1)
            pool = ThreadPool(processes=20)
            pool.map(download_stock_kline_to_redis, codes)
            pool.close()
            pool.join()
        elif cm.DB_WAY == "sqlite":
            df = pd.read_sql_table(cm.INDEX_STOCK_BASIC, engine)
            codes = df[cm.KEY_CODE].get_values()
            # codes = r.lrange(cm.INDEX_STOCK_BASIC, 0, -1)
            pool = ThreadPool(processes=2)
            pool.map(download_stock_kline_to_sqlite, codes)
            pool.close()
            pool.join()
    except Exception as e:
        print str(e)
    print "download all stock k-line finish"

Esempio n. 14

0

Mostra file

File: filter_stock.py Progetto: LiYinglin-Bruce-Lee/stock

    def break_low(self, date):
        '''
        筛选出一年内创新低的股票
        :param date: 某一天的日期 ‘'2017-11-11
        :return:
        '''
        #cmd = 'select * from `{}`'.format(date)
        df = pd.read_sql_table(date, daily_engine,index_col='index')
        # **** 这里的index需要删除一个
        low_db= get_mysql_conn('db_selection')
        low_cursor = low_db.cursor()
        for i in range(len(df)):
            code = df.loc[i]['code']
            cur_low = df.loc[i]['low']

            mins_date,mins = self.get_lowest(code, '2017',date)
            if not mins_date:
                continue
            if mins and float(cur_low)<=float(mins) and float(cur_low) !=0.0:
                print code,
                print df.loc[i]['name']
                print 'year mins {} at {}'.format(mins,mins_date)
                print 'curent mins ',cur_low
                create_cmd = 'create table if not exists break_low' \
                             '(`index` int primary key auto_increment,datetime datetime,code text,name text,low_price float,last_price float, last_price_date datetime);'
                low_cursor.execute(create_cmd)
                insert_cmd = 'insert into break_low (datetime,code,name,low_price,last_price,last_price_date) values (%s,%s,%s,%s,%s,%s);'
                insert_data = (date,code,df.loc[i]['name'],cur_low,mins,mins_date)
                low_cursor.execute(insert_cmd,insert_data)
                low_db.commit()

Esempio n. 15

0

Mostra file

File: James_Gordon.py Progetto: guojing1217/GBRT

def InitializeMonthlyTable():
    disk_engine = create_engine('mysql://*****:*****@quantico.chgivxnnhpn3.us-west-2.rds.amazonaws.com/Quantico')
    df_data = pd.read_sql_table('data',disk_engine)
    allclients = sorted(list(pd.unique(df_data.client.ravel())))
    
    client_job_status_dict = {}
    for client in allclients:
        client_job_status_dict[client] = {'success':0,'failure':0,'partial':0}
    
    start = dt.datetime(2016,2,1,18,0,0)
    days = calendar.monthrange(2016,2)[1]

    #start = dt.datetime(2016,1,18,16,0,0)
    #days = 4
    
    global table
    #un_sorted_table = {}
    for day in range(1,days+1):
        table[start] = copy.deepcopy(client_job_status_dict)
        for client in allclients:
            s=time.mktime(start.timetuple())
            t=start + dt.timedelta(days=1)
            e=time.mktime(t.timetuple())
            if len(df_data.query('started > {0} and started < {1} and client == "{2}" and status == {3}'.format(s,e,client,0))) == 0:
               table[start][client]['success'] = 0
               table[start][client]['failure'] = 0
               table[start][client]['partial'] = 0
            else:
                table[start][client]['success'] = len(df_data[ (df_data['started']>s) & (df_data['started']<e) & (df_data['client']==client) & (df_data['status']==0) ].index)
                table[start][client]['failure'] = len(df_data[ (df_data['started']>s) & (df_data['started']<e) & (df_data['client']==client) & (df_data['status']>1) ].index)
                table[start][client]['partial'] = len(df_data[ (df_data['started']>s) & (df_data['started']<e) & (df_data['client']==client) & (df_data['status']==1) ].index)
            
        start += dt.timedelta(days=1)

Esempio n. 16

0

Mostra file

File: catalog.py Progetto: crawfordsm/astro-toyz

def load_catalog(file_info, load_log=False):
    """
    Load a catalog file. For now this exclusively uses an alchemy connection
    to a DB but in the future this will have methods for various file types,
    or at the very least an export function.
    
    The biggest complication is that in addition to the table information there
    is also metadata (which pandas currently does not support) and a log of
    changes to a catalog.
    """
    cid = file_info['file_settings']['table']
    connect_str = file_info['filepath']
    engine = create_engine(connect_str) # connects to the db
    Base.metadata.bind = engine # binds the metadata to the engine
    if cid not in Base.metadata.tables.keys():
        raise astrotoyz.core.AstroToyzError("Catalog not found in database")
    dataframe = pandas.read_sql_table(cid, engine)
    DBSession = sessionmaker(bind=engine)
    session = DBSession()
    meta = session.query(CatalogMeta).filter(CatalogMeta.cid==cid).first()
    if meta is None:
        raise astrotoyz.core.AstroToyzError("Could not find catalog meta data")
    settings = json.loads(meta.settings)
    if load_log:
        log = pandas.read_sql_query(
            "SELECT * FROM log WHERE cid='{0}'".format(cid),
            engine)
    else:
        log = None
    catalog = Catalog(cid, file_info, name=meta.name, log=log, data=dataframe)
    return catalog

Esempio n. 17

0

Mostra file

File: fin.py Progetto: ahtanwang/p_stock

def fin_read_hy(hy):
	is_first = True
	for i in range(Codes.index.size):
		code = Codes.loc[i, 'code']
		t_name = 'f'+code
		if me.IsTableExist(t_name,G_DBengine) == False:
			continue
		if (is_first):
			fin = pd.read_sql_table(t_name,G_DBengine)
			is_first = False
		else:
			df = pd.read_sql_table(t_name,G_DBengine)
			fin = fin.append(df)
		print '...fin_read_hy:' + hy + '.......[%d of %d]'%(i,Codes.index.size) 
	
	return fin

Esempio n. 18

0

Mostra file

File: fin.py Progetto: ahtanwang/p_stock

def fin_com_hy(hycode):
	global Fin
	hy_fin = pd.read_sql_table('f600036',G_DBengine)
	del hy_fin['level_0']
	hy_fin['code'] = hycode
	hy_fin['name'] = hycode
	hy_fin['jzc'] = 0.0
	hy_fin['sjsr'] = 0.0
	hy_fin['sjlr'] = 0.0
	hy_fin['sjsrhb'] = 0.0
	hy_fin['sjlrhb'] = 0.0
	i = 0
	for y in range(2006,2017):
		for s in range(1,5):
			df1 = Fin[Fin.year == y]
			df2 = df1[df1.season == s]
			if (df2.index.size == 0):
				i = i + 1
				continue
			d_sum = df2.sum()
			if i < hy_fin.index.size:
				hy_fin.iat[i,G_jzc ] = d_sum.jzc
				hy_fin.iat[i,G_sjsr ] = d_sum.sjsr
				hy_fin.iat[i,G_sjlr ] = d_sum.sjlr
				i= i+1
				
	
	for i in range(3, hy_fin.index.size):
		hy_fin.iat[i,G_sjsrhb ] = hy_fin.iat[i,G_sjsr ] /(hy_fin.iat[i,G_jzc] + hy_fin.iat[i-1,G_jzc] + hy_fin.iat[i-2,G_jzc] + hy_fin.iat[i-3,G_jzc]) * 400
		hy_fin.iat[i,G_sjlrhb ] = hy_fin.iat[i,G_sjlr ] /(hy_fin.iat[i,G_jzc] + hy_fin.iat[i-1,G_jzc] + hy_fin.iat[i-2,G_jzc] + hy_fin.iat[i-3,G_jzc]) * 400
	
	return hy_fin

Esempio n. 19

0

Mostra file

File: FXImport.py Progetto: dgoodburn/Project

def uploadFX(rates):

    table = rates[0]
    today = rates[1]

    try:
        df = pd.read_sql_table("fxrates", engine, parse_dates="FXDate")
    except:
        df = pd.read_csv("Common/FX rates.csv", parse_dates=["FXDate"])

    max_date = df["FXDate"].max().date()
    if max_date < last_date:
        df_newdates = update_dates(max_date)
        df = df.append(df_newdates)
        df = df.sort(["FXDate"], ascending=False)

    df.loc[df.loc[:, "FXDate"] == table[0][0], "Rate"] = table[0][1]

    for i in range(len(table)):

        if i == 0 and today == datetime.date.today():
            df.loc[df.loc[:, "FXDate"] >= table[i][0], "Rate"] = table[i][1]
        else:
            df.loc[df.loc[:, "FXDate"] == table[i][0], "Rate"] = table[i][1]

    df = df.iloc[:, 1:]

    df.to_csv("Common/FX rates.csv", index=False)

Esempio n. 20

0

Mostra file

File: session.py Progetto: sibis-platform/sibis

    def get_mysql_table_records(self,table_name,project_name, arm_name, event_descrip, name_of_form=None, subject_id=None):
        """
        Get a dataframe of forms for a specific event

        :param project_name: str
        :param arm_name: str
        :param event_descrip: str
        :return: pandas.DataFrame`
        """

        project_id = self.get_mysql_project_id(project_name)
        if not project_id : 
            return pd.DataFrame()  

        arm_id = self.get_mysql_arm_id(arm_name, project_id)
        event_id = self.get_mysql_event_id(event_descrip, arm_id)
        table_records = pd.read_sql_table(table_name, self.api['redcap_mysql_db'])
        table_forms = table_records[(table_records.project_id == project_id) & (table_records.event_id == event_id)]
        if name_of_form :
            table_forms = table_forms[table_forms.form_name == name_of_form]

        if subject_id:
            table_forms = table_forms[table_forms.record == subject_id]

        return table_forms

Esempio n. 21

0

Mostra file

File: data_download.py Progetto: ongbe/stock

def download_all_stock_history_k_line():
    print 'download all stock k-line start'
    
    try:
        if DB_WAY == 'csv':
            df = pd.DataFrame.from_csv(DownloadDir + INDEX_STOCK_BASIC + '.csv')
            #se = df.loc[int(code)]
            #se = df.ix[code]
            pool = ThreadPool(processes=20)
            pool.map(download_stock_kline_csv, df.index)
            pool.close()
            pool.join()
        elif DB_WAY == 'redis':
            codes = r.smembers(INDEX_STOCK_BASIC)
            #codes = r.lrange(INDEX_STOCK_BASIC, 0, -1)
            pool = ThreadPool(processes=20)
            pool.map(download_stock_kline_to_redis, codes)
            pool.close()
            pool.join()     
        elif DB_WAY == 'mysql':
            df = pd.read_sql_table(INDEX_STOCK_BASIC, engine)
            codes = df[KEY_CODE].get_values() 
            #codes = r.lrange(INDEX_STOCK_BASIC, 0, -1)
            pool = ThreadPool(processes=2)
            pool.map(download_stock_kline_to_sql, codes)
            pool.close()
            pool.join()

    except Exception as e:
        print str(e)
    print 'download all stock k-line finish'

Esempio n. 22

0

Mostra file

File: k_line.py Progetto: Rockyzsu/stock

    def _xiayingxian(self, row, ratio):
        '''
        下影线的逻辑 ratio 下影线的长度比例，数字越大，下影线越长
        row: series类型
        '''
        open_p = float(row['open'])
        # print(open_p)
        closed = float(row['close'])
        # print(closed)
        low = float(row['low'])
        # print(low)
        high = float(row['high'])
        p = min(closed,open_p)
        try:
            diff = (p - low) * 1.00 / (high - low)
            diff=round(diff,3)
        except ZeroDivisionError:
            diff = 0
        if diff > ratio:
                xiayinxian_engine = get_engine('db_selection')
                date,code,name,ocupy_ration ,standards = row['datetime'],row['code'],row['name'],diff,ratio
                df = pd.DataFrame(
                    {'datetime': [date], 'code': [code], 'name': [name], 'ocupy_ration': [ocupy_ration],
                     'standards': [standards]})
                try:
                    df1=pd.read_sql_table('xiayingxian',xiayinxian_engine,index_col='index')
                    df = pd.concat([df1, df])
                except Exception as e:
                    print(e)
                    #return None

                df = df.reset_index(drop=True)
                df.to_sql('xiayingxian',xiayinxian_engine,if_exists='replace')
                return row

Esempio n. 23

0

Mostra file

File: view.py Progetto: ahtanwang/p_stock

def ShowMoney(com1, com2, com3):
	if True:
		tname = 'money'
		if me.IsTableExist(tname, G_DBengine) == False:
			print 'No table ....%s'%tname
			return
		df = pd.read_sql_table(tname,G_DBengine)
		df = df.sort_values('num',  ascending = False)	
		me.PinghuaDF(df, 18, 5)	
		me.PinghuaDF(df, 19, 5)	
		me.PinghuaDF(df, 20, 5)	
		df[['fm2','fm1']].plot(linewidth=LW)		
		df['m1dm2'].plot(color='red', secondary_y=True, linewidth=LW)	
		plt.title(com1 + '  ' +  com2 + '  '  + str(max(df['month'])))

        View_10X(plt, df, '321')
		#plt.xticks(range(0, df.index.size, df.index.size/10))
		
		#ax=plt.gca()  
		#size = df.index.size
		#d_size = size / 9
		#ax.set_xticklabels([df.iat[size-1,1], df.iat[size-1-d_size,1], df.iat[size-1-d_size*2,1], df.iat[size-1-d_size*3,1], df.iat[size-1-d_size*4,1], \
		#    df.iat[size-1-d_size*5,1],df.iat[size-1-d_size*6,1], df.iat[size-1-d_size*7,1], df.iat[size-1-d_size*8,1] ,df.iat[0,1]])

	plt.show()
	plt.close()

Esempio n. 24

0

Mostra file

File: sql.py Progetto: liudengfeng/zipline

def _get_report(only_A, table, columns=None, col='截止日期'):
    """
    获取财务报告数据
    
    使用利润表的公告日期
    """
    engine = get_engine('dataBrowse')
    df = pd.read_sql_table(table, engine, columns=columns)
    if only_A:
        df = df[~df.证券代码.str.startswith('2')]
        df = df[~df.证券代码.str.startswith('9')]
    # df.drop(to_drop, axis=1, inplace=True, errors='ignore')
    asof_dates = _financial_report_announcement_date()
    keys = ['证券代码', '截止日期']
    if col != '截止日期':
        # 处理行业排名
        df['报告年度'] = df[col]
        # 原始数据列名称更改为'截止日期'
        df.rename(columns={col: '截止日期'}, inplace=True)
    df = df.join(
        asof_dates.set_index(keys), on=keys
    )
    df.rename(columns={"证券代码": "sid",
                       "截止日期": "asof_date",
                       "公告日期": "timestamp"},
              inplace=True)
    # 修复截止日期
    _fill_ad_and_ts(df)
    # 规范列名称
    df.columns = df.columns.map(_normalized_col_name)
    df.sort_values(['sid', 'asof_date'], inplace=True)
    return df

Esempio n. 25

0

Mostra file

File: routes.py Progetto: jakubdudek/pf_tracker

def index():
    holding_list = [];
    #try:
    tr_by_date_df=pd.read_sql_table('transaction_'+str(current_user.get_id()), db.engine, index_col='date')
    symbols=pf.get_symbols(tr_by_date_df)
        

    holdings_ts_list = pf.get_holdings(tr_by_date_df, symbols)
    holdings_df = pf.get_current_holdings(holdings_ts_list)
        
    cost_basis = pf.get_costbasis(tr_by_date_df)
        
    # add cost basis and realized gains
    holdings_df = holdings_df.join(cost_basis['basis'])
    holdings_df = holdings_df.join(cost_basis['realized'])
        
    #print(holdings_df)

    #    # turn into a list for datatables
    holdings_list = pf.df_to_obj_list(holdings_df, 'ticker')
    #print(holdings_list)

    #except:
    #    holdings_list =[]
    
    return render_template('portfolio/portfolio.html', holdings=holdings_list)

Esempio n. 26

0

Mostra file

File: parameter_importance.py Progetto: lzlarryli/limelight

def correlation_analysis(dataset):
    # read
    tb = pd.read_sql_table(dataset, db, index_col = 'ID')
    X = tb.iloc[:, :-1]; y = tb.iloc[:, -1]
    # compute correlation
    X.drop(X.columns[X.var() < 1e-5], axis = 1, inplace = True)
    r = np.array([pearsonr(X.ix[:,i], y) for i in range(X.shape[1])])
    rank = np.abs(r[:, 0]).argsort()[::-1]
    # plot top ones
    N = 9
    top = rank[:N]
    traces = []
    names = []
    for (i, c) in enumerate(X.columns[top]):
        names.append('{}<br>(r={:0.2g} p={:0.2g})'.format(
            c, r[top[i], 0], r[top[i], 1]))
        traces.append(go.Scatter(x = X[c].values.tolist(),
                                 y = y.values.tolist(),
                                 mode = 'markers',
                                 showlegend = False))
    fig = tools.make_subplots(rows = 3, cols = 3,
                              subplot_titles = names,
                              vertical_spacing = 0.1,
                              horizontal_spacing = 0.1)
    for (i, p) in enumerate(traces):
        fig.append_trace(p, i // 3 + 1, i % 3 + 1)
    fig['layout'].update(height = 700, width = 1100)
    fig['layout'].update(margin = go.Margin(l = 50, r = 50, b = 50,
                                            t = 50, pad = 0))
    for a in fig.layout.annotations:
        a['font'].update(size = 14)
    return (X.columns[rank], utils.plot_to_div(fig))

Esempio n. 27

0

Mostra file

File: parameter_importance.py Progetto: lzlarryli/limelight

def get_params(dataset):
    tb = pd.read_sql_table(dataset, db, index_col = 'ID')
    X = tb.iloc[:, :-1]
    # remove low var columns
    low_var_cols = X.columns[X.var() < 1e-5]
    cols = [c for c in X.columns if c not in low_var_cols]
    return (cols, low_var_cols)

Esempio n. 28

0

Mostra file

File: mysqlmod.py Progetto: rachelcjordan/gwu-cloud-workshop

def readFromDB(table, dbConnect):
    engine = create_engine('mysql+mysqldb://' + mysql_user + ':' + mysql_pass + '@' + mysql_host + '/' + mysql_db)
    
    df = pd.read_sql_table(table, con=engine)
    #clean up SUBJ column
    #df.SUBJ = df.SUBJ.str.strip()
    return df

Esempio n. 29

0

Mostra file

File: crunch.py Progetto: janusnic/shamebot

def crunch_data():
    engine = sq.create_engine("sqlite:///snapshots.sqlite")
    df = pd.read_sql_table("snapshots", engine)
    df = df.set_index(['datetime'])

    today = datetime.date.today()
    from_date = today - datetime.timedelta(weeks=1)
    #to_date = today - datetime.timedelta(weeks=1)
    to_date = today

    dframes = []
    for source, df in df.groupby(['source']):
        ts = df.loc[:, 'percent_women']
        ts = ts[ts > 0.0]
        rs = ts.resample("W", how={'median' : np.median})
        rs['week'] = rs.index.weekofyear
        rs = rs[from_date:to_date]
        rs.columns = [source, 'week']
        year = rs.index.year[0]
        week = rs.index.weekofyear[0]
        rs = rs.set_index('week')
        dframes.append(rs)

    df = pd.concat(dframes, axis=1, join='inner')

    return df.T, week, year

Esempio n. 30

0

Mostra file

File: test_simlibWrite.py Progetto: rbiswas4/OpSimSummary

def test_writeSimlib():
    pkgDir = os.path.split(oss.__file__)[0]
    dbname = os.path.join(pkgDir, 'example_data', 'enigma_1189_micro.db')
    template_simlib = os.path.join(pkgDir, 'example_data',
                                   'Enigma_1189_micro_main.simlib')

    engineFile = 'sqlite:///' + dbname
    engine = create_engine(engineFile)

    # read the database into a `pd.DataFrame`
    Summary = pd.read_sql_table('Summary', engine)

    EnigmaMain = Summary.query('propID == [364]')
    EnigmaMainSummary = so.SummaryOpsim(EnigmaMain, calculateSNANASimlibs=True,
                                        user='******', host='time')
    simlibfilename = './Enigma_1189_micro_main.simlib'
    EnigmaMainSummary.writeSimlib(simlibfilename)

    with open(template_simlib) as f:
        template_data = f.read()
    with open(simlibfilename) as f:
        new_data = f.read()
    assert new_data == template_data
    if new_data == template_data :
        os.remove(simlibfilename)

Esempio n. 31

0

Mostra file

def data_package(pkg_tables, pkg_skeleton,
                 out_dir=os.path.join(pudl.settings.PUDL_DIR,
                                      "results", "data_pkgs"),
                 testing=False):
    """
    Create a data package of requested tables and their dependencies.
    See Frictionless Data for the tabular data package specification:

    http://frictionlessdata.io/specs/tabular-data-package/

    Args:
        pkg_skeleton (dict): A python dictionary containing several
            top level elements of the data package JSON descriptor
            specific to the data package, including:
              * name: pudl-<datasource> e.g. pudl-eia923, pudl-ferc1
              * title: One line human readable description.
              * description: A paragraph long description.
              * keywords: For search purposes.
        pkg_tables (iterable): The names of database tables to include.
            Each one will be converted into a tabular data resource.
            Dependent tables will also be added to the data package.
        out_dir (path-like): The location of the packaging directory.
            The data package will be created in a subdirectory in
            this directory, according to the name of the package.

    Returns:
        data_pkg (Package): an object representing the data package,
            as defined by the datapackage library.
    """
    # A few paths we are going to need repeatedly:
    # out_dir is the packaging directory -- the place where packages end up
    # pkg_dir is the top level directory of this package:
    pkg_dir = os.path.abspath(os.path.join(out_dir, pkg_skeleton["name"]))
    # data_dir is the data directory within the package directory:
    data_dir = os.path.join(pkg_dir, "data")
    # pkg_json is the datapackage.json that we ultimately output:
    pkg_json = os.path.join(pkg_dir, "datapackage.json")

    # Given the list of target tables, find all dependent tables.
    all_tables = pudl.helpers.get_dependent_tables_from_list(
        pkg_tables, testing=testing)

    # Extract the target tables and save them as CSV files.
    # We have to do this before creating the data resources
    # because the files are necessary in order to calculate
    # the file sizes and hashes.
    for t in all_tables:
        csv_out = os.path.join(data_dir, f"{t}.csv")
        os.makedirs(os.path.dirname(csv_out), exist_ok=True)
        df = pd.read_sql_table(t, pudl.init.connect_db(testing=testing))
        if t in pudl.constants.need_fix_inting:
            df = pudl.helpers.fix_int_na(df, pudl.constants.need_fix_inting[t])
        logger.info(f"Exporting {t} to {csv_out}")
        df.to_csv(csv_out, index=False)

    # Create a tabular data resource for each of the tables.
    resources = []
    for t in all_tables:
        resources.append(
            pudl.output.export.get_tabular_data_resource(t, pkg_dir=pkg_dir))

    data_sources = pudl.helpers.data_sources_from_tables(
        all_tables, testing=testing)

    contributors = set()
    for src in data_sources:
        for c in pudl.constants.contributors_by_source[src]:
            contributors.add(c)

    pkg_descriptor = {
        "name": pkg_skeleton["name"],
        "profile": "tabular-data-package",
        "title": pkg_skeleton["title"],
        "description": pkg_skeleton["description"],
        "keywords": pkg_skeleton["keywords"],
        "homepage": "https://catalyst.coop/pudl/",
        "created": (datetime.datetime.utcnow().
                    replace(microsecond=0).isoformat() + 'Z'),
        "contributors": [pudl.constants.contributors[c] for c in contributors],
        "sources": [pudl.constants.data_sources[src] for src in data_sources],
        "licenses": [pudl.constants.licenses["cc-by-4.0"]],
        "resources": resources,
    }

    # Use that descriptor to instantiate a Package object
    data_pkg = datapackage.Package(pkg_descriptor)

    # Validate the data package descriptor before we go to
    if not data_pkg.valid:
        logger.warning(f"""
            Invalid tabular data package: {data_pkg.descriptor["name"]}
            Errors: {data_pkg.errors}""")

    data_pkg.save(pkg_json)

    # Validate the data within the package using goodtables:
    report = goodtables.validate(pkg_json, row_limit=100_000)
    if not report['valid']:
        logger.warning("Data package data validation failed.")

    return data_pkg

Esempio n. 32

0

Mostra file

File: glove_model.py Progetto: linnndachen/finding-reviwers

def load_data(database_filepath):
    engine = create_engine('sqlite:///' + database_filepath)
    df = pd.read_sql_table('EAdescription', engine)
    return df

Esempio n. 33

0

Mostra file

File: sql.py Progetto: tmct/dask

def read_sql_table(table,
                   uri,
                   index_col,
                   divisions=None,
                   npartitions=None,
                   limits=None,
                   columns=None,
                   bytes_per_chunk="256 MiB",
                   head_rows=5,
                   schema=None,
                   meta=None,
                   engine_kwargs=None,
                   **kwargs):
    """
    Create dataframe from an SQL table.

    If neither divisions or npartitions is given, the memory footprint of the
    first few rows will be determined, and partitions of size ~256MB will
    be used.

    Parameters
    ----------
    table : string or sqlalchemy expression
        Select columns from here.
    uri : string
        Full sqlalchemy URI for the database connection
    index_col : string
        Column which becomes the index, and defines the partitioning. Should
        be a indexed column in the SQL server, and any orderable type. If the
        type is number or time, then partition boundaries can be inferred from
        npartitions or bytes_per_chunk; otherwide must supply explicit
        ``divisions=``.
        ``index_col`` could be a function to return a value, e.g.,
        ``sql.func.abs(sql.column('value')).label('abs(value)')``.
        ``index_col=sql.func.abs(sql.column("value")).label("abs(value)")``, or
        ``index_col=cast(sql.column("id"),types.BigInteger).label("id")`` to convert
        the textfield ``id`` to ``BigInteger``.

        Note ``sql``, ``cast``, ``types`` methods comes frome ``sqlalchemy`` module.

        Labeling columns created by functions or arithmetic operations is
        required.
    divisions: sequence
        Values of the index column to split the table by. If given, this will
        override npartitions and bytes_per_chunk. The divisions are the value
        boundaries of the index column used to define the partitions. For
        example, ``divisions=list('acegikmoqsuwz')`` could be used to partition
        a string column lexographically into 12 partitions, with the implicit
        assumption that each partition contains similar numbers of records.
    npartitions : int
        Number of partitions, if divisions is not given. Will split the values
        of the index column linearly between limits, if given, or the column
        max/min. The index column must be numeric or time for this to work
    limits: 2-tuple or None
        Manually give upper and lower range of values for use with npartitions;
        if None, first fetches max/min from the DB. Upper limit, if
        given, is inclusive.
    columns : list of strings or None
        Which columns to select; if None, gets all; can include sqlalchemy
        functions, e.g.,
        ``sql.func.abs(sql.column('value')).label('abs(value)')``.
        Labeling columns created by functions or arithmetic operations is
        recommended.
    bytes_per_chunk : str, int
        If both divisions and npartitions is None, this is the target size of
        each partition, in bytes
    head_rows : int
        How many rows to load for inferring the data-types, unless passing meta
    meta : empty DataFrame or None
        If provided, do not attempt to infer dtypes, but use these, coercing
        all chunks on load
    schema : str or None
        If using a table name, pass this to sqlalchemy to select which DB
        schema to use within the URI connection
    engine_kwargs : dict or None
        Specific db engine parameters for sqlalchemy
    kwargs : dict
        Additional parameters to pass to `pd.read_sql()`

    Returns
    -------
    dask.dataframe

    Examples
    --------
    >>> df = dd.read_sql_table('accounts', 'sqlite:///path/to/bank.db',
    ...                  npartitions=10, index_col='id')  # doctest: +SKIP
    """
    import sqlalchemy as sa
    from sqlalchemy import sql
    from sqlalchemy.sql import elements

    if index_col is None:
        raise ValueError("Must specify index column to partition on")
    engine_kwargs = {} if engine_kwargs is None else engine_kwargs
    engine = sa.create_engine(uri, **engine_kwargs)
    m = sa.MetaData()
    if isinstance(table, str):
        table = sa.Table(table,
                         m,
                         autoload=True,
                         autoload_with=engine,
                         schema=schema)

    index = table.columns[index_col] if isinstance(index_col,
                                                   str) else index_col
    if not isinstance(index_col, (str, elements.Label)):
        raise ValueError(
            "Use label when passing an SQLAlchemy instance as the index (%s)" %
            index)
    if divisions and npartitions:
        raise TypeError(
            "Must supply either divisions or npartitions, not both")

    columns = ([(table.columns[c] if isinstance(c, str) else c)
                for c in columns] if columns else list(table.columns))
    if index_col not in columns:
        columns.append(table.columns[index_col] if isinstance(index_col, str
                                                              ) else index_col)

    if isinstance(index_col, str):
        kwargs["index_col"] = index_col
    else:
        # function names get pandas auto-named
        kwargs["index_col"] = index_col.name

    if meta is None:
        # derive metadata from first few rows
        q = sql.select(columns).limit(head_rows).select_from(table)
        head = pd.read_sql(q, engine, **kwargs)

        if head.empty:
            # no results at all
            name = table.name
            schema = table.schema
            head = pd.read_sql_table(name,
                                     uri,
                                     schema=schema,
                                     index_col=index_col)
            return from_pandas(head, npartitions=1)

        bytes_per_row = (head.memory_usage(deep=True,
                                           index=True)).sum() / head_rows
        meta = head.iloc[:0]
    else:
        if divisions is None and npartitions is None:
            raise ValueError(
                "Must provide divisions or npartitions when using explicit meta."
            )

    if divisions is None:
        if limits is None:
            # calculate max and min for given index
            q = sql.select([sql.func.max(index),
                            sql.func.min(index)]).select_from(table)
            minmax = pd.read_sql(q, engine)
            maxi, mini = minmax.iloc[0]
            dtype = minmax.dtypes["max_1"]
        else:
            mini, maxi = limits
            dtype = pd.Series(limits).dtype

        if npartitions is None:
            q = sql.select([sql.func.count(index)]).select_from(table)
            count = pd.read_sql(q, engine)["count_1"][0]
            npartitions = (int(
                round(count * bytes_per_row /
                      dask.utils.parse_bytes(bytes_per_chunk))) or 1)
        if dtype.kind == "M":
            divisions = pd.date_range(
                start=mini,
                end=maxi,
                freq="%iS" % ((maxi - mini).total_seconds() / npartitions),
            ).tolist()
            divisions[0] = mini
            divisions[-1] = maxi
        elif dtype.kind in ["i", "u", "f"]:
            divisions = np.linspace(mini, maxi, npartitions + 1).tolist()
        else:
            raise TypeError(
                'Provided index column is of type "{}".  If divisions is not provided the '
                "index column type must be numeric or datetime.".format(dtype))

    parts = []
    lowers, uppers = divisions[:-1], divisions[1:]
    for i, (lower, upper) in enumerate(zip(lowers, uppers)):
        cond = index <= upper if i == len(lowers) - 1 else index < upper
        q = sql.select(columns).where(sql.and_(index >= lower,
                                               cond)).select_from(table)
        parts.append(
            delayed(_read_sql_chunk)(q,
                                     uri,
                                     meta,
                                     engine_kwargs=engine_kwargs,
                                     **kwargs))

    engine.dispose()

    return from_delayed(parts, meta, divisions=divisions)

Esempio n. 34

0

Mostra file

File: run.py Progetto: s-shabnam/disaster_response_pipeline

app = Flask(__name__)

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens

# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('Cleaned_Messages', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():
    
    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

Esempio n. 35

0

Mostra file

File: grab_raw_data.py Progetto: jadecodes8191/buildingEnergyApi

import datetime
import sqlalchemy
import pandas as pd
import sqlite3
import numpy as np

PATH = 'my_file'

# Reads in databases from tasks 1 and 2
engine = sqlalchemy.create_engine('sqlite:///' + PATH)
all_data = pd.read_sql_table("TempAndCO2Log", engine)
all_data.to_csv("tester.csv")

Esempio n. 36

0

Mostra file

File: run.py Progetto: nameisunique/Disaster_Response_Pipeline

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('messages_disaster', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # Using the Genre's as provided and added in Top 10
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

Esempio n. 37

0

Mostra file

File: run.py Progetto: ebrym/DisasterResponse

    
    # remove stop words
    stopwords_ = stopwords.words("english")
    words = [word for word in words if word not in stopwords_]
    
    # extract root form of words
    words = [WordNetLemmatizer().lemmatize(word, pos='v') for word in words]

    return words




# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('DisasterMessages', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():
    
    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

Esempio n. 38

0

Mostra file

File: run.py Progetto: bobzhoumj/project2

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load dataDisasterResponse
engine = create_engine('sqlite:////home/workspace/models/DisasterResponse.db')
#engine = create_engine('sqlite:///.workspace/models/DisasterResponse.db')
df = pd.read_sql_table('DisasterResponse', engine)

# load model
model = joblib.load("/home/workspace/models/classifier.pickle")
#model = joblib.load("./models/classifier.pickle")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

Esempio n. 39

0

Mostra file

def index(request):

    dests = Destination.objects.all()
    if request.user.is_authenticated:
        global df
        engine = create_engine(
            'postgresql+psycopg2://postgres:postgres@localhost:5432/telusko')
        df_d = pd.read_sql_table(
            "travello_destination",
            con=engine,
            schema='public',
            coerce_float=True,
            columns=['name', 'img', 'desc', 'state', 'city', 'typeofplace'])
        df = pd.DataFrame(df_d)
        geocoder = OpenCageGeocode('ea7fd5e689b149c38ef13cbed352bff5')
        list_lat = []
        list_long = []
        for index, row in df.iterrows():

            name = get_name_from_index(index, df)

            state = get_state_from_index(index, df)
            city = get_city_from_index(index, df)
            query = str(name) + ',' + str(city) + ',' + str(state)
            print("hi")
            results = geocoder.geocode(query)
            print('$$$$$$', results)
            if len(results) != 0:
                lat = results[0]['geometry']['lat']
                longi = results[0]['geometry']['lng']
            else:
                print("results is empty")

            print("hello", index, name, state)
            list_lat.append(lat)
            list_long.append(longi)
        df['lat'] = list_lat
        df['lon'] = list_long
        print(df)
        features = ['desc', 'state', 'typeofplace']
        for feature in features:
            df[feature] = df[feature].fillna('')

        df['combined_features'] = df.apply(combine_features, axis=1)
        cv = CountVectorizer()
        count_matrix = cv.fit_transform(df['combined_features'])
        cosine_sim = cosine_similarity(count_matrix)
        custom = CustomPreferences.objects.all()
        for c in custom:
            if str(c.user) == str(request.user):
                user_prefer = c.preferences
                user_prefer = user_prefer.split(",")
                rows_data = []
                for up in user_prefer:
                    place_index = get_index_from_title(up, df)
                    similar_places = list(enumerate(cosine_sim[place_index]))
                    sorted_similar_places = sorted(similar_places,
                                                   key=lambda x: x[1],
                                                   reverse=True)
                    i = 0
                    for place in sorted_similar_places:
                        row_data = get_title_from_index(place[0], df)
                        rows_data.append(row_data)
                        i = i + 1
                        if i > 3:
                            break
                final_data = []
                for dest in dests:
                    for lists in rows_data:
                        if dest.name in lists:
                            result = TextBlob(dest.desc)
                            polar = result.sentiment.polarity
                            if polar > 0.0:
                                final_data.append(dest)

    else:
        user_prefer = []
        final_data = []

    return render(request, "index.html", {
        'dests': dests,
        'recommendations': final_data
    })

Esempio n. 40

0

Mostra file

    # Initialize lemmatizer
    lemmatizer = WordNetLemmatizer()

    # Lowercase, eliminate blank spaces and findin the root form of the words
    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok, pos='v').lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
try:
    engine = create_engine('sqlite:///../data/DisasterResponse.db')
    df = pd.read_sql_table('messages_categories', con=engine)
except:
    print(
        'If load data from database failed, try to run it from the app folder')

# load model
model = joblib.load("../models/classifier_model.pkl")


# Function for first plot
def first_plot(df):
    """Create first plot TOP 10 categories """

    # Define counts
    categories = df.drop(['id', 'message', 'original', 'genre'],
                         axis=1).sum().sort_values(ascending=False)

Esempio n. 41

0

Mostra file

from wtforms import TextField, Form
import pandas as pd
from sqlalchemy import create_engine
from global_parameters import *

# assign values from global_parameters.py to local variables
# number_movies_returned - how many most similar movies output when requested for each movie
number_movies_returned = global_number_movies_returned
# database_filepath - name of the sqlalchemy database file where recommendations are stored
database_filepath = global_database_filepath

app = Flask(__name__)

# extract movies data
engine = create_engine('sqlite:///' + database_filepath)
movies_data = pd.read_sql_table('Closest_movies', engine)

engine.dispose()

# get the movie titles
movie_titles = list(movies_data['movie_title'])


# SearchForm class will allow us to have autocomplete feature
class SearchForm(Form):
    movie_autocomplete = TextField('Movie name', id='movie_autocomplete')


@app.route('/autocomplete', methods=['GET', 'POST'])
def autocomplete():
    '''

Esempio n. 42

0

Mostra file

File: 任务4.2 掌握DataFrame的常用操作.py Progetto: Flerken101/Python-Programming

# -*- coding: utf-8 -*-

###############################################################################
#######################            正文代码             #######################
###############################################################################

# 代码 4-12
from sqlalchemy import create_engine
import pandas as pd
## 创建数据库连接
engine = create_engine('mysql+pymysql://root:[email protected]:\
3306/testdb?charset=utf8')
detail= pd.read_sql_table('meal_order_detail1',con = engine)
print('订单详情表的索引为：', detail.index)

print('订单详情表的所有值为：','\n', detail.values)
print('订单详情表的列名为：','\n', detail.columns)
print('订单详情表的数据类型为：','\n', detail.dtypes)


# 代码 4-13
## 查看DataFrame的元素个数
print('订单详情表的元素个数为：', detail.size)
print('订单详情表的维度数为：', detail.ndim) ## 查看DataFrame的维度数
print('订单详情表的形状为：', detail.shape) ## 查看DataFrame的形状



# 代码 4-14
print('订单详情表转置前形状为：',detail.shape)
print('订单详情表转置后形状为为：',detail.T.shape)

Esempio n. 43

0

Mostra file

File: create_sne_test_cats.py Progetto: LSSTDESC/SLSprinkler

                        help='obsHistID to generate InstanceCatalog for')
    parser.add_argument('--sne_truth_cat', type=str,
                        help='path to lensed AGN truth catalog')
    parser.add_argument('--output_dir', type=str,
                        help='output directory for catalog and sed folder')
    parser.add_argument('--cat_file_name', type=str,
                        help='filename of instance catalog written')
    parser.add_argument('--sed_folder', type=str,
                        help='directory to put SNe SEDs. Will appear in output_dir.')

    args = parser.parse_args()

    obs_gen = ObservationMetaDataGenerator(database=args.obs_db,
                                           driver='sqlite')

    sne_truth_db = create_engine('sqlite:///%s' % args.sne_truth_cat, echo=False)
    sne_truth_cat = pd.read_sql_table('lensed_sne', sne_truth_db)
    lensed_sne_ic = lensedSneCat(sne_truth_cat, args.output_dir,
                                 args.cat_file_name, args.sed_folder)

    obs_md = get_obs_md(obs_gen, args.obs_id, 2, dither=True)
    print(obs_md.mjd.TAI)
    for obs_time in np.arange(obs_md.mjd.TAI, obs_md.mjd.TAI + 35.1, 0.25):
        obs_filter = obs_md.bandpass
        print('Writing Instance Catalog for Visit: %i at MJD: %f in Bandpass: %s' % (args.obs_id,
                                                                                     obs_time,
                                                                                     obs_filter))
        add_to_cat_idx, sne_magnorms, sne_sed_names = lensed_sne_ic.calc_sne_mags(obs_time, obs_filter)
        lensed_sne_ic.output_instance_catalog(add_to_cat_idx, sne_magnorms,
                                            sne_sed_names, obs_md, str('test_cat_%.4f' % obs_time))

Esempio n. 44

0

Mostra file

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('df', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

Esempio n. 45

0

Mostra file

File: app.py Progetto: okaracalik/football-team-builder

def application (environ, start_response):

    session = get_database_session(engine)

    json_c_type = "application/json"

    paths = [p for p in environ['PATH_INFO'].split('/') if p != '']

    headers = []
    origin = environ.get("HTTP_ORIGIN")
    method = environ['REQUEST_METHOD']

    response_factory = ResponseFactory()

    # /
    if len(paths) == 0:

      payload = {f'message': f"Server works!"}

      return response_factory \
              .create(200) \
              .get_response(payload, start_response)

    else:
      # /players/
      if paths[0] == 'players':

        # GET: /players/
        if method == 'GET':

          # GET: /players/
          if len(paths) == 1:
            try:
              # query extract
              query = parse_qs(environ['QUERY_STRING'])
              name = query.get('name', [''])[0]
              club = query.get('club', [''])[0]
              nationality = query.get('nationality', [''])[0]
              limit = int(query.get('limit', [10])[0])
              skip = int(query.get('skip', [0])[0])

              rows_all = session.query(Player)

              if name != '':
                rows_all = rows_all.filter(Player.name.ilike(f'%{name}%'))
              if club != '':
                rows_all = rows_all.filter(Player.club.ilike(f'%{club}%'))
              if nationality != '':
                rows_all = rows_all.filter(Player.nationality.ilike(f'%{nationality}%'))

              rows_all = rows_all.order_by(Player.overall.desc(), Player.value.desc())

              rows =  rows_all \
                        .offset(skip) \
                        .limit(limit)

              # result set
              records = [dict(id=r.id, name=r.name, position=r.position, nationality=r.nationality,
                              flag=r.flag, club=r.club, age=r.age, photo=r.photo,
                              value=r.value, overall=r.overall )
                        for r in rows]

              payload = {
                'count': rows_all.count(),
                'results': records
              }

              return response_factory \
                      .create(200, origin, method) \
                      .get_response(payload, start_response)

            except Exception as e:

              payload = {f'message': f"Oops! Something went wrong! => {e}"}

              return response_factory \
                      .create(500) \
                      .get_response(payload, start_response)


          # GET: /players/...
          else:

            payload = {'message': f"You are not allowed to make request to `{environ['PATH_INFO']}`!"}

            return response_factory \
                    .create(403) \
                    .get_response(payload, start_response)

        # POST: /players/
        elif method == 'POST':

          # POST: /players/team/
          if len(paths) == 2 and paths[1] == 'team':

            length = int(environ.get('CONTENT_LENGTH', '0'))
            request_body = environ['wsgi.input'].read(length)
            body = json.loads(request_body)
            # verify
            formation = body.get('formation', None)
            budget = int(body.get('budget', 0))
            include_free_agents = body.get('include_free_agents', True)

            if formation and all([p in POSITIONS for p in formation]) and budget >= 1 * 10**6:

              try:

                formation = [ f.lower() for f in formation]

                temp = pd.read_sql_table('players', con=engine)[['id', 'position', 'value', 'overall']] \
                        .dropna(subset=['position']) \
                        .query(f'position in {formation}')

                temp = temp if include_free_agents else temp.query('value > 0')

                prob, ids = compute_best_lineup(temp, formation, budget)

                rows = session.query(Player).filter(Player.id.in_(ids))

                records = { r.position: dict(id=r.id, name=r.name, position=r.position, nationality=r.nationality,
                                             flag=r.flag, club=r.club, age=r.age, photo=r.photo,
                                              value=r.value, overall=r.overall )
                            for r in rows}

                payload = {
                  'total_overall': sum([r.overall for r in rows]),
                  'total_value': sum([r.value for r in rows]),
                  'formation': formation,
                  'results': records
                }

                return response_factory \
                        .create(200, origin, method) \
                        .get_response(payload, start_response)

              except Exception as e:

                payload = {f'message': f"Oops! Something went wrong! => {e}"}

                return response_factory \
                        .create(500) \
                        .get_response(payload, start_response)


            else:

              payload = {
                'message': f"Please select 11 unique and valid positions and set budget greater than € 1,000,000!",
                'formation': formation,
                'budget': budget
              }

              return response_factory \
                        .create(400) \
                        .get_response(payload, start_response)

          # POST: /players/...
          else:

            payload = {'message': f"You are not allowed to make request to `{environ['PATH_INFO']}`!"}

            return response_factory \
                    .create(403) \
                    .get_response(payload, start_response)

        # HEAD/OPTIONS: /players/
        elif method == 'HEAD' or  method == 'OPTIONS':

          return response_factory \
                    .create(200, origin, method) \
                    .get_response(None, start_response)

        # PUT/DELETE/PATCH/OPTIONS: /players/
        else:

          payload = {f'message': f"You are not allowed to make request with {environ['REQUEST_METHOD']} to `{environ['PATH_INFO']}`!"}

          return response_factory \
                  .create(405) \
                  .get_response(payload, start_response)


      # /assets/
      elif paths[0] == 'assets':

        if method == 'GET':
          status = '200 OK'


          try:
            headers.append(('Content-Type', 'image/png'))

            start_response(status, headers)

            with open(f"./{environ['PATH_INFO']}", "rb") as f:
              img = f.read()
              size = stat(f"./{environ['PATH_INFO']}").st_size

            if size == 0:
              with open(f"./assets/players/000000.png", "rb") as f:
                img = f.read()

            return [img]

          except Exception as e:

            payload = {'message': f"Image is not available! => {e}"}

            return response_factory \
                    .create(404) \
                    .get_response(payload, start_response)

          else:

            payload = {f'message': f"You are not allowed to make request with {environ['REQUEST_METHOD']} to `{environ['PATH_INFO']}`!"}

            return response_factory \
                    .create(405) \
                    .get_response(payload, start_response)

      # /*
      else:

        payload = {'message': f"You are not allowed to make request to `{environ['PATH_INFO']}`!"}

        return response_factory \
                .create(403) \
                .get_response(payload, start_response)

Esempio n. 46

0

Mostra file

    'mssql://LAPTOP-TH3PDN0I/Group_8_DB?driver=ODBC+Driver+17+for+SQL+Server')
print("Connected.")

csvfile = '../SPARC_10k_part-ce.csv'
print("File name to load: " + csvfile)

################################################################################
#
# Job Step 10: Load Dimensions
#
################################################################################
jobutils.printStepStart("10")

print("Loading dimensions to memory...")

dim_date_df = pd.read_sql_table('DimDate', con=engine).fillna('')
dim_location_df = pd.read_sql_table('DimLocation', con=engine).fillna('')
dim_demographics_df = pd.read_sql_table('DimDemographics',
                                        con=engine).fillna('')
dim_payment_df = pd.read_sql_table('DimPayment', con=engine).fillna('')
dim_clinic_class_df = pd.read_sql_table('DimClinicClass',
                                        con=engine).fillna('')
dim_apr_class_df = pd.read_sql_table('DimAPRClassification',
                                     con=engine).fillna('')
dim_admission_df = pd.read_sql_table('DimAdmission', con=engine).fillna('')
dim_provider_df = pd.read_sql_table('DimProvider', con=engine).fillna('')

print("Done. Dimensions loaded.")

################################################################################
#

Esempio n. 47

0

Mostra file

File: sql.py Progetto: JacobGreen770/Pancham

 def time_read_sql_table_column(self, dtype):
     read_sql_table(self.table_name, self.con, columns=[dtype])

Esempio n. 48

0

Mostra file

File: run.py Progetto: dheerajmitra/DisasterResponsePipeline

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('DisasterRis', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

Esempio n. 49

0

Mostra file


def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for token in tokens:
        clean_token = lemmatizer.lemmatize(token).lower().strip()
        clean_tokens.append(clean_token)
    return clean_tokens


# load data
engine = create_engine('sqlite:///./data/DisasterResponse.db')
df = pd.read_sql_table('Disaster_Response_ETL', engine)

# load model
model = joblib.load("./models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

Esempio n. 50

0

Mostra file

File: run.py Progetto: KhAlotaibi/Disaster-Response-Pipelines

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('DataFrame', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
    category_counts = df.iloc[:, 4:].sum().sort_values(ascending=False)[1:6]

Esempio n. 51

0

Mostra file

def start_flow():

    job_id = admin_api.start_job()

    if (not job_id):
        current_app.logger.info('Failed to get job_id')
        job_outcome = 'busy'

    else:
        log_db.log_exec_status(job_id, 'start_flow', 'executing', '')

        file_path_list = os.listdir(CURRENT_SOURCE_FILES_PATH)

        if file_path_list:
            with engine.connect() as connection:
                Base.metadata.create_all(connection)

                # Get previous version of pdp_contacts table, which is used later to classify new records
                pdp_contacts_df = pd.read_sql_table('pdp_contacts', connection)
                pdp_contacts_df = pdp_contacts_df[
                    pdp_contacts_df["archived_date"].isnull()]
                pdp_contacts_df = pdp_contacts_df.drop(columns=[
                    'archived_date', 'created_date', '_id', 'matching_id'
                ])

                current_app.logger.info(
                    'Loaded {} records from pdp_contacts table'.format(
                        pdp_contacts_df.shape[0]))

                # Clean the input data and normalize/rename columns
                # Populate new records in secondary tables (donations, volunteer shifts)
                # input - existing files in path
                # output - normalized object of all entries, as well as the input json rows for primary sources
                log_db.log_exec_status(job_id, 'clean_and_load', 'executing',
                                       '')
                normalized_data, source_json, manual_matches_df = clean_and_load_data.start(
                    connection, pdp_contacts_df, file_path_list)

                # Standardize column data types via postgres (e.g. reading a csv column as int vs. str)
                # (If additional inconsistencies are encountered, may need to enforce the schema of
                # the contacts loader by initializing it from pdp_contacts.)
                normalized_data.to_sql('_temp_pdp_contacts_loader',
                                       connection,
                                       index=False,
                                       if_exists='replace')
                normalized_data = pd.read_sql_table(
                    '_temp_pdp_contacts_loader', connection)

                # Classifies rows to old rows that haven't changed, updated rows and new rows - compared to the existing state of the DB
                log_db.log_exec_status(job_id, 'classify', 'executing', '')
                rows_classified = calssify_new_data.start(
                    pdp_contacts_df, normalized_data)

                # Archives rows the were updated in the current state of the DB (changes their archived_date to now)
                archive_rows.archive(connection, rows_classified["updated"])

                # Match new+updated records against previous version of pdp_contacts database, and
                # write these rows to the database.
                match_data.start(connection, rows_classified,
                                 manual_matches_df, job_id)

                # Copy raw input rows to json fields in pdp_contacts,
                # using a temporary table to simplify the update code.
                current_app.logger.info(
                    'Saving json of original rows to pdp_contacts')
                source_json.to_sql('_temp_pdp_contacts_loader',
                                   connection,
                                   index=False,
                                   if_exists='replace')
                # https://www.postgresql.org/docs/8.4/sql-update.html
                connection.execute('''
                    UPDATE pdp_contacts pdp
                    SET json = to_json(temp.json)
                    FROM _temp_pdp_contacts_loader temp
                    WHERE
                        pdp.source_type = temp.source_type AND
                        pdp.source_id = temp.source_id AND
                        pdp.archived_date IS NULL
                ''')

            current_app.logger.info('Finished flow script run')
            job_outcome = 'completed'

        else:  # No files in list
            current_app.logger.info('No files to process')
            job_outcome = 'nothing to do'

        log_db.log_exec_status(job_id, 'flow', 'complete', '')

    return job_outcome

Esempio n. 52

0

Mostra file

File: sql.py Progetto: JacobGreen770/Pancham

 def time_read_sql_table_all(self):
     read_sql_table(self.table_name, self.con)

Esempio n. 53

0

Mostra file

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('messages', engine)

# load model
model = joblib.load("../models/classifier.joblib")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():
    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    num_categories = df[[

Esempio n. 54

0

Mostra file

File: FireAndForgetJob.py Progetto: MichaelArbel/independent-jobs

def extract_array(fname,
                  param_names,
                  result_name="result",
                  non_existing=np.nan,
                  redux_funs=[np.nanmean],
                  return_param_values=True,
                  conditionals={},
                  db_is_sqlite=False):
    """
    Given a database file (as e.g. product by FireAndForgetJob, extraxts an
    array where each dimension corresponds to a provided parameter, and
    each element is a redux (e.g. mean) of all results (of given same)
    for the parameter combinations.
    An optional set of additional conditions can be specified.
    
    Database file can be csv or sqlite.
    
    Empty parameter names lead to just aggregating (sliced by conditionals) the results.
    
    A default value can be specified.
    """
    if db_is_sqlite:
        from sqlalchemy import create_engine
        import sqlalchemy as sa
        engine = create_engine('sqlite:///{}'.format(fname))
        df = pd.read_sql_table("FireAndForgetJob", engine)
    else:
        with open(fname) as f:
            df = pd.read_csv(f, error_bad_lines=False, warn_bad_lines=False)

    for k, v in conditionals.items():
        df = df.loc[df[k] == v]
        if k in param_names:
            param_names.remove(k)

    # no parameter names means just return the aggregated values for the (sliced) result
    if len(param_names) == 0:
        return np.array([redux(df[result_name]) for redux in redux_funs])

    param_values = {
        param_name: np.sort(df[param_name].dropna().unique())
        for param_name in param_names
    }

    sizes = [len(param_values[param_name]) for param_name in param_names]
    results = [np.zeros(tuple(sizes)) + non_existing for _ in redux_funs]

    # compute aggregate for each unique appearance of all parameters
    redux = df.groupby(param_names,
                       as_index=False)[result_name].agg(redux_funs)

    # since not all parameter combinations might be computed, iterate and pull out computed ones
    all_combs = itertools.product(
        *[param_values[param_name] for param_name in param_names])

    for index, comb in enumerate(all_combs):
        # one element tuples should be the value itself
        if len(comb) == 1:
            comb = comb[0]

        result_ind = np.unravel_index(index, tuple(sizes))

        # parameter combination was computed
        if comb in redux.index:
            # extract results and put them in the right place
            for i, redux_fun in enumerate(redux_funs):
                results[i][result_ind] = redux.loc[comb][redux_fun.__name__]

    if not return_param_values:
        return results
    else:
        return results, param_values

Esempio n. 55

0

Mostra file

File: run.py Progetto: lailashahreen17/Disaster_Response_Pipeline_Udacity

app = Flask(__name__)

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens

# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('data/DisasterResponse_table',engine)
    

#load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')

def index():
    
    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']

Esempio n. 56

0

Mostra file

File: tools.py Progetto: tade0726/transport_center_simulation

def load_from_mysql(table_name: str):
    """读取远程mysql数据表"""
    LOG.logger_font.info(msg=f"Reading mysql table {table_name}")
    table = pd.read_sql_table(con=RemoteMySQLConfig.engine,
                              table_name=f"{table_name}")
    return table

Esempio n. 57

0

Mostra file

File: run.py Progetto: Ali-Alhamaly/disaster-tweet-classification

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data.db')
df = pd.read_sql_table('data', engine)

# load model
model = joblib.load("../classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    percent_request = 100 * df.groupby('genre').sum()['request'] / (
        df.groupby('genre').count()['message'])

Esempio n. 58

0

Mostra file

File: get_ip_province.py Progetto: zhilangtaosha/works

import pandas as pd
from sqlalchemy import create_engine
engine = create_engine(
    'postgresql://*****:*****@localhost:5432/xueshandai')
ip = pd.read_sql_table('ip_to_map_province', engine)


def get_ip_value(ip):
    ip2 = ip.split('.')
    ipv = int(ip2[0]) * 256**3 + int(ip2[1]) * 256**2 + int(
        ip2[2]) * 256 + int(ip2[3])
    return ipv


def findcountryprovince_from_db(ip):
    #     print("ip:",ip)
    ipv = get_ip_value(ip)
    sql = 'select * from ip_store2 where ipv1<=' + str(
        ipv) + ' and ipv2>=' + str(ipv)
    #     print("sql:",sql)
    data = pd.read_sql_query(sql, engine)
    if data.size > 0:
        return data.loc[0, 'province']
    else:
        return 'unknow'


ip['province'] = ''

for k in ip.index:
    #     print(k,ip.loc[k,'ip'])

Esempio n. 59

0

Mostra file

File: Predicting_Fatalities_by_Suspect.py Progetto: jennitian/gun-violence

POSTGRES_PORT = 5432
POSTGRES_USERNAME = '******'
POSTGRES_PASSWORD = db_password
POSTGRES_DBNAME = 'us_gun_violence'

# In[4]:

# creat connection string and database engine
db_string = f'postgres://{POSTGRES_USERNAME}:{POSTGRES_PASSWORD}@{POSTGRES_ADDRESS}:{POSTGRES_PORT}/{POSTGRES_DBNAME}'

engine = create_engine(db_string)

# In[5]:

# import transformed suspects dataframe
suspects_df = pd.read_sql_table('suspects_ml_transformed', engine)
suspects_df.head()

# In[6]:

# import incidents dataframe
incidents_df = pd.read_sql_table('incidents', engine)
incidents_df.head()

# ## Preprocess Data

# In[7]:

# combine suspect and incident data
suspects_incidents_df = suspects_df.merge(incidents_df,
                                          how='left',

Esempio n. 60

0

Mostra file

# creating lists of the original names, new names, and keys for the database migration
old_table_names = [
    'armory_item', 'armory_weapon', 'charactercreator_character',
    'charactercreator_character_inventory', 'charactercreator_cleric',
    'charactercreator_fighter', 'charactercreator_mage',
    'charactercreator_necromancer', 'charactercreator_thief'
]

new_table_names = [
    'items', 'weapons', 'characters', 'inventories', 'clerics', 'fighters',
    'mages', 'necromancers', 'thieves'
]

keys = [
    'item_id', 'item_ptr_id', 'character_id', 'id', 'character_ptr_id',
    'character_ptr_id', 'character_ptr_id', 'mage_ptr_id', 'character_ptr_id'
]

for i in range(len(old_table_names)):
    table = sq_curs.execute(""" select * from {}""".format(old_table_names[i]))
    table_df = pd.read_sql_table('{}'.format(old_table_names[i]), sq_engine)
    table_df.set_index(keys[i], inplace=True)
    if len(table_df) > 0:
        table_df.to_sql('{}'.format(new_table_names[i]),
                        engine,
                        if_exists='replace')

pg_conn.close()
sq_con.close()