コード例 #1
0
 def _load_chat(self):
   """Return df of iMessage chats."""
   # Create sql engine for pandas connection.
   im_engine = create_engine('sqlite:////Users/{u}/Library/Messages/chat.db'.format(u=self.username))
   # Load tables into DataFrames.
   handle_cols = ['ROWID', 'id']
   df_handle = pd.read_sql_table(table_name='handle', con=im_engine, columns=handle_cols)
   df_chat_handle_join = pd.read_sql_table(table_name='chat_handle_join', con=im_engine)
   chat_cols = ['ROWID', 'display_name', 'chat_identifier']
   df_chat = pd.read_sql_table(table_name='chat', con=im_engine, columns=chat_cols)
   df_chat_msg_join = pd.read_sql_table(table_name='chat_message_join', con=im_engine)
   msg_cols = ['ROWID', 'text', 'date', 'is_emote', 'is_from_me', 'handle_id']
   df_msg = pd.read_sql_table(table_name='message', con=im_engine, columns=msg_cols)
   # Join iMessage tables into single DataFrame. 
   ## Remove unnecessary fields before each join.
   df = df_handle.merge(df_chat_handle_join, left_on='ROWID', right_on='handle_id', how='left')
   df.drop(labels=['ROWID'], axis=1, inplace=True)
   df = df.merge(df_chat, left_on='chat_id', right_on='ROWID', how='left')
   df.drop(labels=['ROWID'], axis=1, inplace=True)
   df = df.merge(df_chat_msg_join, on='chat_id', how='left')
   df.drop(labels=['chat_id'], axis=1, inplace=True)
   df = df.merge(df_msg, left_on=['message_id', 'handle_id'], right_on=['ROWID', 'handle_id'], how='left')
   df.drop(labels=['ROWID', 'message_id'], axis=1, inplace=True)
   # Parse timestamp.
   ## Use 978307200 to convert Apple NSDate to Unix Epoch time. 
   ns_conversion = 978307200
   _date_func = lambda x: dt.fromtimestamp(x + ns_conversion) if not np.isnan(x) else dt.now()
   df.date = df.date.apply(_date_func)
   return df
コード例 #2
0
def rank(dataset, force_overwrite = False):
    # name of the result table
    res_tb = dataset + '_result'
    # read even when cached. column names are extracted from X
    tb = pd.read_sql_table(dataset, db, index_col = 'ID')
    X = tb.iloc[:, :-1]; y = tb.iloc[:, -1]
    # check if it is cached
    if res_tb in db.table_names() and not force_overwrite:
        # yes, get it
        res = pd.read_sql_table(res_tb, db, index_col = 'index')
    else:
        # no, compute it
        # remove low var columns
        low_var_cols = X.columns[X.var() < 1e-5]
        X.drop(low_var_cols, axis = 1, inplace = True)
        # rank
        (rank1, R2) = rfe_with_grid_search(X.values, y,
                                          RandomForestRegressor(n_jobs = -1),
                                          [{'n_estimators': [5, 10, 30],
                                            'max_features': [1.0]}])
        (rank2, scores) = lassocv_n_random_lasso(X, y)
        res = pd.DataFrame(np.array([X.columns[rank1], R2,
                                     X.columns[rank2], scores]).T,
                           columns = ['rfe_random_forest',
                                      'R2',
                                      'randomized_lasso',
                                      'scores'])
        
        res.to_sql(res_tb, db, if_exists = 'replace')
    return (res['rfe_random_forest'], res['R2'],
            res['randomized_lasso'], res['scores'])
コード例 #3
0
ファイル: views.py プロジェクト: gomar/woney
def get_balance():
    accounts = pd.read_sql_table('account', db.engine)
    transactions = pd.read_sql_table('transaction', db.engine, columns=['account', 'amount'])
    scheduled_transactions = pd.read_sql_table('scheduled_transaction', 
                                               db.engine) 

    transactions = transactions.rename(columns={'account': 'name'})
    transactions = transactions.groupby('name', as_index=False).sum()
    accounts['amount'] = accounts['reconciled_balance']
    for name in transactions.name:
        accounts.ix[accounts['name'] == name, 'amount'] += \
            transactions.ix[transactions.name == name, 'amount'].iloc[-1]
    # taking scheduled transactions into account
    accounts['end_of_month_amount'] = accounts['amount']
    for idx, operation in scheduled_transactions.iterrows():
        i = 0
        today = datetime.datetime.now()
        last_day_of_month = today + relativedelta(day=1, months=+1, days=-1)
        while operation.next_occurence \
            + relativedelta(**{operation.every_type: i * operation.every_nb}) \
            <= last_day_of_month:
            i += 1
        accounts.ix[accounts['name'] == operation.account, 'end_of_month_amount'] += \
            operation.amount * i
    return accounts
コード例 #4
0
ファイル: dataloader.py プロジェクト: neuralyzer/kerasvis
 def __init__(self, path=None):
     if path is None:
         path = "sqlite:///" + os.path.join(os.environ["HOME"], "tmp", "keras_logs.db")
     db_path = path.replace("sqlite:///", "")
     try:
         self.logs = pd.read_sql_table("log", path)
         self.runs = pd.read_sql_table("run", path).rename(columns={"id": "runid"}).sort_values("runid", ascending=False)
         self.df = self.logs.merge(self.runs)
     except ValueError:
         self.runs = pd.DataFrame({"runid":[], "comment":[], "user":[]})
def getAllData(poi_pca,con):
    order_info = pd.read_sql_table('compressed_districts',con=con)
    weather_info = pd.read_sql_table('weather_info',con=con)
    cluster_map = pd.read_sql_table('district_info',con=con)
    traffic_info = pd.read_sql_table('traffic_info',con=con)
    poi_info = pd.read_sql_table('poi_info',con=con)
    fixed_weather = dw.fixWeatherData(weather_info)
    fixed_traffic = dw.fixTrafficData(traffic_info,cluster_map)
    fixed_poi,expected_var = cp.compressPoiData(poi_info.fillna(0),poi_pca)
    return order_info,fixed_poi,fixed_weather,fixed_traffic
コード例 #6
0
ファイル: pdsql.py プロジェクト: allisnone/tradeStrategy
 def get_table_df(self,table,columns=None):
     """
     :param table: string type, db_name.table_name
     :param columns: lit type with string value, like: ['acc_name', 'initial']
     :return: DataFrame type
     """
     if columns:
         return pd.read_sql_table(table, self.engine)
     else:
         return pd.read_sql_table(table, self.engine, columns)
コード例 #7
0
ファイル: view.py プロジェクト: ahtanwang/p_stock
def ShowGS(com1, com2, com3):
	df2 = Codes[Codes['code'].isin([com1])]
	print df2

	if com2 == 'syl30':
		tname = 'b'+com1
		if me.IsTableExist(tname, G_DBengine) == False:
			print 'No table ....%s'%tname
			return
		df = pd.read_sql_table(tname,G_DBengine)
		if df.index.size > 250:
			df1 = df.drop(range(df.index.size - 250))
		me.PinghuaDF(df1, md.BI_syl30+1, 5)
		plt.title(com1 + '  ' +  com2 + '  '  + str(max(df1['date'])))
		plt.fill_between(df1.index, df1['syl30'], 0, where=df1['syl30']>0,facecolor='red')
		plt.fill_between(df1.index, df1['syl30'], 0, where=df1['syl30']<=0,facecolor='green')
		
	elif com2 == 'syl250':
		tname = 'b'+com1
		if me.IsTableExist(tname, G_DBengine) == False:
			print 'No table ....%s'%tname
			return	
		df = pd.read_sql_table(tname,G_DBengine)
		me.PinghuaDF(df, md.BI_syl250+1, 30)
		plt.fill_between(df.index, df['syl250'], 0, where=df['syl250']>0,facecolor='red')
		plt.fill_between(df.index, df['syl250'], 0, where=df['syl250']<=0,facecolor='green')
		plt.title(com1 + '  ' +  com2 + '  '  + str(max(df['date'])))
		View_10X(plt,df, '123')
		
	elif com2 == 'hb':
		tname = 'f'+com1	
		if me.IsTableExist(tname, G_DBengine) == False:
			print 'No table ....%s'%tname
			return	
		df = pd.read_sql_table('f'+com1,G_DBengine)
		df[1:df.index.size][['sjsrhb','sjlrhb']].plot(kind='bar',color={'red','green'})
		df[1:df.index.size]['nhgdqyl'].plot(color='blue', secondary_y=True, linewidth = LW)
		plt.title(com1 + '  ' +  com2 + '  ' + str(df.loc[df.index.size-1,'year']) + '  ' + str(df.loc[df.index.size-1,'season'])  )

	elif com2 == 'sr':
		tname = 'f'+com1	
		if me.IsTableExist(tname, G_DBengine) == False:
			print 'No table ....%s'%tname
			return	
		df = pd.read_sql_table('f'+com1,G_DBengine)
		df[1:df.index.size]['sjsr'].plot(kind='bar',color='green')
		df[1:df.index.size]['sjlr'].plot(color='red', secondary_y=True, linewidth=LW)				
		plt.title(com1 + '  ' +  com2 + '  ' + str(df.loc[df.index.size-1,'year']) + '  ' + str(df.loc[df.index.size-1,'season'])  )		
		
	else:
		print '[Error] input error ...'
		return
		
	plt.show()
	plt.close()
コード例 #8
0
ファイル: compare.py プロジェクト: joepax/openpharma
def read_data():
    """read data from mysql"""

    # open connection and read table
    my_db = sa.engine.url.URL(drivername = 'mysql',
                              database = 'openpharma_db',
                              query = {'read_default_file' : '~/.my.cnf'}
                              )
    engine = sa.create_engine(name_or_url = my_db)
    df_class = pd.read_sql_table(table_name = 'classifier_tb', con = engine)
    df_trial = pd.read_sql_table(table_name = 'trials_tb', con = engine)

    return df_class, df_trial
コード例 #9
0
    def fromOpSimDB(cls, dbname, subset='combined'):
	"""
	Class Method to instantiate this from an OpSim sqlite
	database output

	Parameters
	----------
	dbname :
	subset :
	"""
	allowed_subsets = cls.get_allowed_subsets()
	subset = subset.lower()
	if subset not in allowed_subsets:
	    raise NotImplementedError('subset {} not implemented'.\
				      format(subset))
        if not dbname.startswith('sqlite'):
            dbname =  'sqlite:///' + dbname
        print(' reading from database {}'.format(dbname))
        engine = create_engine(dbname, echo=False)
	# Read the proposal table to find out which propID corresponds to
        proposals = pd.read_sql_table('Proposal', con=engine)
        propDict = cls.get_propIDDict(proposals)

        # Do the actual sql queries or table reads
        if subset in ['_all', 'unique_all']:
            # In this case read everything (ie. table read)
	    summary = pd.read_sql_table('Summary', con=engine)
            # _all will be used only to write out other serialized versions
            # of OpSim. Do not drop duplicates, so that different subsets can
            # be constructed from the same hdf file
	    if subset == 'unique_all':
	       summary.drop_duplicates(subset='obsHistID', inplace=True)	
            summary.set_index('obsHistID', inplace=True)
	    return cls(propIDDict=propDict, summary=summary,
                       proposalTable=proposals)
	else:
	    sql_query = 'SELECT * FROM Summary WHERE PROPID'
	    if subset == 'ddf':
		sql_query += ' == {0}'.format(propDict['ddf'])
	    if subset == 'wfd':
		sql_query += ' == {0}'.format(propDict['wfd'])
	    if subset == 'combined':
		sql_query += ' in [{0}, {1}]'.format(propDict['wfd'],
                                                     propDict['ddf'])
        # Read the summary table 
        summary = pd.read_sql_query(sql_query, con=engine)
	summary.drop_duplicates(subset='obsHistID', inplace=True)
	summary.set_index('obsHistID', inplace=True)
        return cls(propIDDict=propDict, summary=summary,
                   proposalTable=proposals)
コード例 #10
0
ファイル: sum.py プロジェクト: joepax/openpharma
def read_data():
    """read data from mysql"""

    print "Reading data..." 

    # open connection and read table
    my_db = sa.engine.url.URL(drivername = 'mysql',
                              database = 'openpharma_db',
                              query = {'read_default_file' : '~/.my.cnf'}
                              )
    engine = sa.create_engine(name_or_url = my_db)
    df_compare = pd.read_sql_table(table_name = 'compare_tb', con = engine)
    df_packages = pd.read_sql_table(table_name = 'packages_tb', con = engine)

    return df_compare, df_packages
コード例 #11
0
def unlock_form(project_name, arm_name, event_descrip, form_name, engine, subject_id = None):
    """
    Unlock a given form be removing records from table

    :param project_name: str
    :param arm_name: str
    :param event_descrip: str
    :param form_name: str
    :param engine: `sqlalchemy.Engine`
    :param subject_id: str
    :return: None
    """
    # get ids needed for unlocking
    project_id = get_project_id(project_name, engine)
    arm_id = get_arm_id(arm_name, project_id, engine)
    event_id = get_event_id(event_descrip, arm_id, engine)
    # get a list of all the locked records and filter for records to remove
    locked_records = pd.read_sql_table('redcap_locking_data', engine)
    locked_forms = locked_records[(locked_records.project_id == project_id) &
                                  (locked_records.event_id == event_id) &
                                  (locked_records.form_name == form_name)]
    if subject_id : 
        locked_forms = locked_forms[(locked_forms.record == subject_id)]

    # generate the list of ids to drop and remove from db table
    global locked_list
    locked_list = ', '.join([str(i) for i in locked_forms.ld_id.values.tolist()])
    if locked_list:
        sql = 'DELETE FROM redcap_locking_data ' \
              'WHERE redcap_locking_data.ld_id IN ({0});'.format(locked_list)
        execute(sql, engine)
        return True
    else :
        return False
コード例 #12
0
ファイル: plot_time_mem.py プロジェクト: Shenglai/apipe
def plot_time_mem_picard_val(pre_post_fastqc_readcount_match_df,data_engine,logger):
    time_mem_picard_validatesamfile_df=pd.read_sql_table('time_mem_picard_validatesamfile',data_engine)

    df=pd.merge(pre_post_fastqc_readcount_match_df,time_mem_picard_validatesamfile_df,how='inner',on='uuid')
    df_pre=df
    df_pos=df[df['bam_path'].str.contains('realn')]
    ndf=df[['pre_count','wall_clock']]
    ndf=ndf.convert_objects(convert_numeric=True)
    ndf['wall_clock']=ndf['wall_clock']/3600
    ndf['pre_count']=ndf['pre_count']/1000000
    ax=ndf.plot(x='pre_count',y='wall_clock',kind='scatter')
    
    yrange_max=math.ceil(max(ndf['wall_clock']))
    xmod=ndf['pre_count']/500
    xmod_ceil=math.ceil(max(xmod))
    xrange_max=xmod_ceil*500
    ax.set_title('picard ValidateSamFile')
    ax.set_xlim([0,xrange_max])
    ax.set_ylim([0,yrange_max])
    ax.set_xlabel('readcount (millions)')
    ax.set_ylabel('run time (hr)')
    fig=ax.get_figure()
    
    fig.savefig('picard_validate_readcount_preharmonize.png',dpi=600)
    fig.savefig('picard_validate_readcount_postharmonize.png',dpi=600)
    fig.savefig('picard_validate_filesize_preharmonize.png',dpi=600)
    fig.savefig('picard_validate_filesize_postharmonize.png',dpi=600)
コード例 #13
0
ファイル: data_to_sql.py プロジェクト: hbwzhsh/stock
def download_all_stock_history_k_line():
    print "download all stock k-line start"

    try:
        if cm.DB_WAY == "csv":
            df = pd.DataFrame.from_csv(cm.DownloadDir + cm.TABLE_STOCKS_BASIC + ".csv")
            # se = df.loc[int(code)]
            # se = df.ix[code]
            pool = ThreadPool(processes=20)
            pool.map(download_stock_kline, df.index)
            pool.close()
            pool.join()
        elif cm.DB_WAY == "redis":
            codes = r.smembers(cm.INDEX_STOCK_BASIC)
            # codes = r.lrange(cm.INDEX_STOCK_BASIC, 0, -1)
            pool = ThreadPool(processes=20)
            pool.map(download_stock_kline_to_redis, codes)
            pool.close()
            pool.join()
        elif cm.DB_WAY == "sqlite":
            df = pd.read_sql_table(cm.INDEX_STOCK_BASIC, engine)
            codes = df[cm.KEY_CODE].get_values()
            # codes = r.lrange(cm.INDEX_STOCK_BASIC, 0, -1)
            pool = ThreadPool(processes=2)
            pool.map(download_stock_kline_to_sqlite, codes)
            pool.close()
            pool.join()
    except Exception as e:
        print str(e)
    print "download all stock k-line finish"
コード例 #14
0
    def break_low(self, date):
        '''
        筛选出一年内创新低的股票
        :param date: 某一天的日期 ‘'2017-11-11
        :return:
        '''
        #cmd = 'select * from `{}`'.format(date)
        df = pd.read_sql_table(date, daily_engine,index_col='index')
        # **** 这里的index需要删除一个
        low_db= get_mysql_conn('db_selection')
        low_cursor = low_db.cursor()
        for i in range(len(df)):
            code = df.loc[i]['code']
            cur_low = df.loc[i]['low']

            mins_date,mins = self.get_lowest(code, '2017',date)
            if not mins_date:
                continue
            if mins and float(cur_low)<=float(mins) and float(cur_low) !=0.0:
                print code,
                print df.loc[i]['name']
                print 'year mins {} at {}'.format(mins,mins_date)
                print 'curent mins ',cur_low
                create_cmd = 'create table if not exists break_low' \
                             '(`index` int primary key auto_increment,datetime datetime,code text,name text,low_price float,last_price float, last_price_date datetime);'
                low_cursor.execute(create_cmd)
                insert_cmd = 'insert into break_low (datetime,code,name,low_price,last_price,last_price_date) values (%s,%s,%s,%s,%s,%s);'
                insert_data = (date,code,df.loc[i]['name'],cur_low,mins,mins_date)
                low_cursor.execute(insert_cmd,insert_data)
                low_db.commit()
コード例 #15
0
ファイル: James_Gordon.py プロジェクト: guojing1217/GBRT
def InitializeMonthlyTable():
    disk_engine = create_engine('mysql://*****:*****@quantico.chgivxnnhpn3.us-west-2.rds.amazonaws.com/Quantico')
    df_data = pd.read_sql_table('data',disk_engine)
    allclients = sorted(list(pd.unique(df_data.client.ravel())))
    
    client_job_status_dict = {}
    for client in allclients:
        client_job_status_dict[client] = {'success':0,'failure':0,'partial':0}
    
    start = dt.datetime(2016,2,1,18,0,0)
    days = calendar.monthrange(2016,2)[1]

    #start = dt.datetime(2016,1,18,16,0,0)
    #days = 4
    
    global table
    #un_sorted_table = {}
    for day in range(1,days+1):
        table[start] = copy.deepcopy(client_job_status_dict)
        for client in allclients:
            s=time.mktime(start.timetuple())
            t=start + dt.timedelta(days=1)
            e=time.mktime(t.timetuple())
            if len(df_data.query('started > {0} and started < {1} and client == "{2}" and status == {3}'.format(s,e,client,0))) == 0:
               table[start][client]['success'] = 0
               table[start][client]['failure'] = 0
               table[start][client]['partial'] = 0
            else:
                table[start][client]['success'] = len(df_data[ (df_data['started']>s) & (df_data['started']<e) & (df_data['client']==client) & (df_data['status']==0) ].index)
                table[start][client]['failure'] = len(df_data[ (df_data['started']>s) & (df_data['started']<e) & (df_data['client']==client) & (df_data['status']>1) ].index)
                table[start][client]['partial'] = len(df_data[ (df_data['started']>s) & (df_data['started']<e) & (df_data['client']==client) & (df_data['status']==1) ].index)
            
        start += dt.timedelta(days=1)
コード例 #16
0
ファイル: catalog.py プロジェクト: crawfordsm/astro-toyz
def load_catalog(file_info, load_log=False):
    """
    Load a catalog file. For now this exclusively uses an alchemy connection
    to a DB but in the future this will have methods for various file types,
    or at the very least an export function.
    
    The biggest complication is that in addition to the table information there
    is also metadata (which pandas currently does not support) and a log of
    changes to a catalog.
    """
    cid = file_info['file_settings']['table']
    connect_str = file_info['filepath']
    engine = create_engine(connect_str) # connects to the db
    Base.metadata.bind = engine # binds the metadata to the engine
    if cid not in Base.metadata.tables.keys():
        raise astrotoyz.core.AstroToyzError("Catalog not found in database")
    dataframe = pandas.read_sql_table(cid, engine)
    DBSession = sessionmaker(bind=engine)
    session = DBSession()
    meta = session.query(CatalogMeta).filter(CatalogMeta.cid==cid).first()
    if meta is None:
        raise astrotoyz.core.AstroToyzError("Could not find catalog meta data")
    settings = json.loads(meta.settings)
    if load_log:
        log = pandas.read_sql_query(
            "SELECT * FROM log WHERE cid='{0}'".format(cid),
            engine)
    else:
        log = None
    catalog = Catalog(cid, file_info, name=meta.name, log=log, data=dataframe)
    return catalog
コード例 #17
0
ファイル: fin.py プロジェクト: ahtanwang/p_stock
def fin_read_hy(hy):
	is_first = True
	for i in range(Codes.index.size):
		code = Codes.loc[i, 'code']
		t_name = 'f'+code
		if me.IsTableExist(t_name,G_DBengine) == False:
			continue
		if (is_first):
			fin = pd.read_sql_table(t_name,G_DBengine)
			is_first = False
		else:
			df = pd.read_sql_table(t_name,G_DBengine)
			fin = fin.append(df)
		print '...fin_read_hy:' + hy + '.......[%d of %d]'%(i,Codes.index.size) 
	
	return fin	
コード例 #18
0
ファイル: fin.py プロジェクト: ahtanwang/p_stock
def fin_com_hy(hycode):
	global Fin
	hy_fin = pd.read_sql_table('f600036',G_DBengine)
	del hy_fin['level_0']
	hy_fin['code'] = hycode
	hy_fin['name'] = hycode
	hy_fin['jzc'] = 0.0
	hy_fin['sjsr'] = 0.0
	hy_fin['sjlr'] = 0.0
	hy_fin['sjsrhb'] = 0.0
	hy_fin['sjlrhb'] = 0.0
	i = 0
	for y in range(2006,2017):
		for s in range(1,5):
			df1 = Fin[Fin.year == y]
			df2 = df1[df1.season == s]
			if (df2.index.size == 0):
				i = i + 1
				continue
			d_sum = df2.sum()
			if i < hy_fin.index.size:
				hy_fin.iat[i,G_jzc ] = d_sum.jzc
				hy_fin.iat[i,G_sjsr ] = d_sum.sjsr
				hy_fin.iat[i,G_sjlr ] = d_sum.sjlr
				i= i+1
				
	
	for i in range(3, hy_fin.index.size):
		hy_fin.iat[i,G_sjsrhb ] = hy_fin.iat[i,G_sjsr ] /(hy_fin.iat[i,G_jzc] + hy_fin.iat[i-1,G_jzc] + hy_fin.iat[i-2,G_jzc] + hy_fin.iat[i-3,G_jzc]) * 400
		hy_fin.iat[i,G_sjlrhb ] = hy_fin.iat[i,G_sjlr ] /(hy_fin.iat[i,G_jzc] + hy_fin.iat[i-1,G_jzc] + hy_fin.iat[i-2,G_jzc] + hy_fin.iat[i-3,G_jzc]) * 400
	
	return hy_fin
コード例 #19
0
ファイル: FXImport.py プロジェクト: dgoodburn/Project
def uploadFX(rates):

    table = rates[0]
    today = rates[1]

    try:
        df = pd.read_sql_table("fxrates", engine, parse_dates="FXDate")
    except:
        df = pd.read_csv("Common/FX rates.csv", parse_dates=["FXDate"])

    max_date = df["FXDate"].max().date()
    if max_date < last_date:
        df_newdates = update_dates(max_date)
        df = df.append(df_newdates)
        df = df.sort(["FXDate"], ascending=False)

    df.loc[df.loc[:, "FXDate"] == table[0][0], "Rate"] = table[0][1]

    for i in range(len(table)):

        if i == 0 and today == datetime.date.today():
            df.loc[df.loc[:, "FXDate"] >= table[i][0], "Rate"] = table[i][1]
        else:
            df.loc[df.loc[:, "FXDate"] == table[i][0], "Rate"] = table[i][1]

    df = df.iloc[:, 1:]

    df.to_csv("Common/FX rates.csv", index=False)
コード例 #20
0
ファイル: session.py プロジェクト: sibis-platform/sibis
    def get_mysql_table_records(self,table_name,project_name, arm_name, event_descrip, name_of_form=None, subject_id=None):
        """
        Get a dataframe of forms for a specific event

        :param project_name: str
        :param arm_name: str
        :param event_descrip: str
        :return: pandas.DataFrame`
        """

        project_id = self.get_mysql_project_id(project_name)
        if not project_id : 
            return pd.DataFrame()  

        arm_id = self.get_mysql_arm_id(arm_name, project_id)
        event_id = self.get_mysql_event_id(event_descrip, arm_id)
        table_records = pd.read_sql_table(table_name, self.api['redcap_mysql_db'])
        table_forms = table_records[(table_records.project_id == project_id) & (table_records.event_id == event_id)]
        if name_of_form :
            table_forms = table_forms[table_forms.form_name == name_of_form]

        if subject_id:
            table_forms = table_forms[table_forms.record == subject_id]

        return table_forms
コード例 #21
0
ファイル: data_download.py プロジェクト: ongbe/stock
def download_all_stock_history_k_line():
    print 'download all stock k-line start'
    
    try:
        if DB_WAY == 'csv':
            df = pd.DataFrame.from_csv(DownloadDir + INDEX_STOCK_BASIC + '.csv')
            #se = df.loc[int(code)]
            #se = df.ix[code]
            pool = ThreadPool(processes=20)
            pool.map(download_stock_kline_csv, df.index)
            pool.close()
            pool.join()
        elif DB_WAY == 'redis':
            codes = r.smembers(INDEX_STOCK_BASIC)
            #codes = r.lrange(INDEX_STOCK_BASIC, 0, -1)
            pool = ThreadPool(processes=20)
            pool.map(download_stock_kline_to_redis, codes)
            pool.close()
            pool.join()     
        elif DB_WAY == 'mysql':
            df = pd.read_sql_table(INDEX_STOCK_BASIC, engine)
            codes = df[KEY_CODE].get_values() 
            #codes = r.lrange(INDEX_STOCK_BASIC, 0, -1)
            pool = ThreadPool(processes=2)
            pool.map(download_stock_kline_to_sql, codes)
            pool.close()
            pool.join()

    except Exception as e:
        print str(e)
    print 'download all stock k-line finish'
コード例 #22
0
ファイル: k_line.py プロジェクト: Rockyzsu/stock
    def _xiayingxian(self, row, ratio):
        '''
        下影线的逻辑 ratio 下影线的长度比例,数字越大,下影线越长
        row: series类型
        '''
        open_p = float(row['open'])
        # print(open_p)
        closed = float(row['close'])
        # print(closed)
        low = float(row['low'])
        # print(low)
        high = float(row['high'])
        p = min(closed,open_p)
        try:
            diff = (p - low) * 1.00 / (high - low)
            diff=round(diff,3)
        except ZeroDivisionError:
            diff = 0
        if diff > ratio:
                xiayinxian_engine = get_engine('db_selection')
                date,code,name,ocupy_ration ,standards = row['datetime'],row['code'],row['name'],diff,ratio
                df = pd.DataFrame(
                    {'datetime': [date], 'code': [code], 'name': [name], 'ocupy_ration': [ocupy_ration],
                     'standards': [standards]})
                try:
                    df1=pd.read_sql_table('xiayingxian',xiayinxian_engine,index_col='index')
                    df = pd.concat([df1, df])
                except Exception as e:
                    print(e)
                    #return None

                df = df.reset_index(drop=True)
                df.to_sql('xiayingxian',xiayinxian_engine,if_exists='replace')
                return row
コード例 #23
0
ファイル: view.py プロジェクト: ahtanwang/p_stock
def ShowMoney(com1, com2, com3):
	if True:
		tname = 'money'
		if me.IsTableExist(tname, G_DBengine) == False:
			print 'No table ....%s'%tname
			return
		df = pd.read_sql_table(tname,G_DBengine)
		df = df.sort_values('num',  ascending = False)	
		me.PinghuaDF(df, 18, 5)	
		me.PinghuaDF(df, 19, 5)	
		me.PinghuaDF(df, 20, 5)	
		df[['fm2','fm1']].plot(linewidth=LW)		
		df['m1dm2'].plot(color='red', secondary_y=True, linewidth=LW)	
		plt.title(com1 + '  ' +  com2 + '  '  + str(max(df['month'])))

        View_10X(plt, df, '321')
		#plt.xticks(range(0, df.index.size, df.index.size/10))
		
		#ax=plt.gca()  
		#size = df.index.size
		#d_size = size / 9
		#ax.set_xticklabels([df.iat[size-1,1], df.iat[size-1-d_size,1], df.iat[size-1-d_size*2,1], df.iat[size-1-d_size*3,1], df.iat[size-1-d_size*4,1], \
		#    df.iat[size-1-d_size*5,1],df.iat[size-1-d_size*6,1], df.iat[size-1-d_size*7,1], df.iat[size-1-d_size*8,1] ,df.iat[0,1]])

	plt.show()
	plt.close()	
コード例 #24
0
ファイル: sql.py プロジェクト: liudengfeng/zipline
def _get_report(only_A, table, columns=None, col='截止日期'):
    """
    获取财务报告数据
    
    使用利润表的公告日期
    """
    engine = get_engine('dataBrowse')
    df = pd.read_sql_table(table, engine, columns=columns)
    if only_A:
        df = df[~df.证券代码.str.startswith('2')]
        df = df[~df.证券代码.str.startswith('9')]
    # df.drop(to_drop, axis=1, inplace=True, errors='ignore')
    asof_dates = _financial_report_announcement_date()
    keys = ['证券代码', '截止日期']
    if col != '截止日期':
        # 处理行业排名
        df['报告年度'] = df[col]
        # 原始数据列名称更改为'截止日期'
        df.rename(columns={col: '截止日期'}, inplace=True)
    df = df.join(
        asof_dates.set_index(keys), on=keys
    )
    df.rename(columns={"证券代码": "sid",
                       "截止日期": "asof_date",
                       "公告日期": "timestamp"},
              inplace=True)
    # 修复截止日期
    _fill_ad_and_ts(df)
    # 规范列名称
    df.columns = df.columns.map(_normalized_col_name)
    df.sort_values(['sid', 'asof_date'], inplace=True)
    return df
コード例 #25
0
ファイル: routes.py プロジェクト: jakubdudek/pf_tracker
def index():
    holding_list = [];
    #try:
    tr_by_date_df=pd.read_sql_table('transaction_'+str(current_user.get_id()), db.engine, index_col='date')
    symbols=pf.get_symbols(tr_by_date_df)
        

    holdings_ts_list = pf.get_holdings(tr_by_date_df, symbols)
    holdings_df = pf.get_current_holdings(holdings_ts_list)
        
    cost_basis = pf.get_costbasis(tr_by_date_df)
        
    # add cost basis and realized gains
    holdings_df = holdings_df.join(cost_basis['basis'])
    holdings_df = holdings_df.join(cost_basis['realized'])
        
    #print(holdings_df)

    #    # turn into a list for datatables
    holdings_list = pf.df_to_obj_list(holdings_df, 'ticker')
    #print(holdings_list)

    #except:
    #    holdings_list =[]
    
    return render_template('portfolio/portfolio.html', holdings=holdings_list)
コード例 #26
0
def correlation_analysis(dataset):
    # read
    tb = pd.read_sql_table(dataset, db, index_col = 'ID')
    X = tb.iloc[:, :-1]; y = tb.iloc[:, -1]
    # compute correlation
    X.drop(X.columns[X.var() < 1e-5], axis = 1, inplace = True)
    r = np.array([pearsonr(X.ix[:,i], y) for i in range(X.shape[1])])
    rank = np.abs(r[:, 0]).argsort()[::-1]
    # plot top ones
    N = 9
    top = rank[:N]
    traces = []
    names = []
    for (i, c) in enumerate(X.columns[top]):
        names.append('{}<br>(r={:0.2g} p={:0.2g})'.format(
            c, r[top[i], 0], r[top[i], 1]))
        traces.append(go.Scatter(x = X[c].values.tolist(),
                                 y = y.values.tolist(),
                                 mode = 'markers',
                                 showlegend = False))
    fig = tools.make_subplots(rows = 3, cols = 3,
                              subplot_titles = names,
                              vertical_spacing = 0.1,
                              horizontal_spacing = 0.1)
    for (i, p) in enumerate(traces):
        fig.append_trace(p, i // 3 + 1, i % 3 + 1)
    fig['layout'].update(height = 700, width = 1100)
    fig['layout'].update(margin = go.Margin(l = 50, r = 50, b = 50,
                                            t = 50, pad = 0))
    for a in fig.layout.annotations:
        a['font'].update(size = 14)
    return (X.columns[rank], utils.plot_to_div(fig))
コード例 #27
0
def get_params(dataset):
    tb = pd.read_sql_table(dataset, db, index_col = 'ID')
    X = tb.iloc[:, :-1]
    # remove low var columns
    low_var_cols = X.columns[X.var() < 1e-5]
    cols = [c for c in X.columns if c not in low_var_cols]
    return (cols, low_var_cols)
コード例 #28
0
def readFromDB(table, dbConnect):
    engine = create_engine('mysql+mysqldb://' + mysql_user + ':' + mysql_pass + '@' + mysql_host + '/' + mysql_db)
    
    df = pd.read_sql_table(table, con=engine)
    #clean up SUBJ column
    #df.SUBJ = df.SUBJ.str.strip()
    return df
コード例 #29
0
ファイル: crunch.py プロジェクト: janusnic/shamebot
def crunch_data():
    engine = sq.create_engine("sqlite:///snapshots.sqlite")
    df = pd.read_sql_table("snapshots", engine)
    df = df.set_index(['datetime'])

    today = datetime.date.today()
    from_date = today - datetime.timedelta(weeks=1)
    #to_date = today - datetime.timedelta(weeks=1)
    to_date = today

    dframes = []
    for source, df in df.groupby(['source']):
        ts = df.loc[:, 'percent_women']
        ts = ts[ts > 0.0]
        rs = ts.resample("W", how={'median' : np.median})
        rs['week'] = rs.index.weekofyear
        rs = rs[from_date:to_date]
        rs.columns = [source, 'week']
        year = rs.index.year[0]
        week = rs.index.weekofyear[0]
        rs = rs.set_index('week')
        dframes.append(rs)

    df = pd.concat(dframes, axis=1, join='inner')

    return df.T, week, year
コード例 #30
0
def test_writeSimlib():
    pkgDir = os.path.split(oss.__file__)[0]
    dbname = os.path.join(pkgDir, 'example_data', 'enigma_1189_micro.db')
    template_simlib = os.path.join(pkgDir, 'example_data',
                                   'Enigma_1189_micro_main.simlib')

    engineFile = 'sqlite:///' + dbname
    engine = create_engine(engineFile)

    # read the database into a `pd.DataFrame`
    Summary = pd.read_sql_table('Summary', engine)

    EnigmaMain = Summary.query('propID == [364]')
    EnigmaMainSummary = so.SummaryOpsim(EnigmaMain, calculateSNANASimlibs=True,
                                        user='******', host='time')
    simlibfilename = './Enigma_1189_micro_main.simlib'
    EnigmaMainSummary.writeSimlib(simlibfilename)

    with open(template_simlib) as f:
        template_data = f.read()
    with open(simlibfilename) as f:
        new_data = f.read()
    assert new_data == template_data
    if new_data == template_data :
        os.remove(simlibfilename)
コード例 #31
0
def data_package(pkg_tables, pkg_skeleton,
                 out_dir=os.path.join(pudl.settings.PUDL_DIR,
                                      "results", "data_pkgs"),
                 testing=False):
    """
    Create a data package of requested tables and their dependencies.
    See Frictionless Data for the tabular data package specification:

    http://frictionlessdata.io/specs/tabular-data-package/

    Args:
        pkg_skeleton (dict): A python dictionary containing several
            top level elements of the data package JSON descriptor
            specific to the data package, including:
              * name: pudl-<datasource> e.g. pudl-eia923, pudl-ferc1
              * title: One line human readable description.
              * description: A paragraph long description.
              * keywords: For search purposes.
        pkg_tables (iterable): The names of database tables to include.
            Each one will be converted into a tabular data resource.
            Dependent tables will also be added to the data package.
        out_dir (path-like): The location of the packaging directory.
            The data package will be created in a subdirectory in
            this directory, according to the name of the package.

    Returns:
        data_pkg (Package): an object representing the data package,
            as defined by the datapackage library.
    """
    # A few paths we are going to need repeatedly:
    # out_dir is the packaging directory -- the place where packages end up
    # pkg_dir is the top level directory of this package:
    pkg_dir = os.path.abspath(os.path.join(out_dir, pkg_skeleton["name"]))
    # data_dir is the data directory within the package directory:
    data_dir = os.path.join(pkg_dir, "data")
    # pkg_json is the datapackage.json that we ultimately output:
    pkg_json = os.path.join(pkg_dir, "datapackage.json")

    # Given the list of target tables, find all dependent tables.
    all_tables = pudl.helpers.get_dependent_tables_from_list(
        pkg_tables, testing=testing)

    # Extract the target tables and save them as CSV files.
    # We have to do this before creating the data resources
    # because the files are necessary in order to calculate
    # the file sizes and hashes.
    for t in all_tables:
        csv_out = os.path.join(data_dir, f"{t}.csv")
        os.makedirs(os.path.dirname(csv_out), exist_ok=True)
        df = pd.read_sql_table(t, pudl.init.connect_db(testing=testing))
        if t in pudl.constants.need_fix_inting:
            df = pudl.helpers.fix_int_na(df, pudl.constants.need_fix_inting[t])
        logger.info(f"Exporting {t} to {csv_out}")
        df.to_csv(csv_out, index=False)

    # Create a tabular data resource for each of the tables.
    resources = []
    for t in all_tables:
        resources.append(
            pudl.output.export.get_tabular_data_resource(t, pkg_dir=pkg_dir))

    data_sources = pudl.helpers.data_sources_from_tables(
        all_tables, testing=testing)

    contributors = set()
    for src in data_sources:
        for c in pudl.constants.contributors_by_source[src]:
            contributors.add(c)

    pkg_descriptor = {
        "name": pkg_skeleton["name"],
        "profile": "tabular-data-package",
        "title": pkg_skeleton["title"],
        "description": pkg_skeleton["description"],
        "keywords": pkg_skeleton["keywords"],
        "homepage": "https://catalyst.coop/pudl/",
        "created": (datetime.datetime.utcnow().
                    replace(microsecond=0).isoformat() + 'Z'),
        "contributors": [pudl.constants.contributors[c] for c in contributors],
        "sources": [pudl.constants.data_sources[src] for src in data_sources],
        "licenses": [pudl.constants.licenses["cc-by-4.0"]],
        "resources": resources,
    }

    # Use that descriptor to instantiate a Package object
    data_pkg = datapackage.Package(pkg_descriptor)

    # Validate the data package descriptor before we go to
    if not data_pkg.valid:
        logger.warning(f"""
            Invalid tabular data package: {data_pkg.descriptor["name"]}
            Errors: {data_pkg.errors}""")

    data_pkg.save(pkg_json)

    # Validate the data within the package using goodtables:
    report = goodtables.validate(pkg_json, row_limit=100_000)
    if not report['valid']:
        logger.warning("Data package data validation failed.")

    return data_pkg
コード例 #32
0
def load_data(database_filepath):
    engine = create_engine('sqlite:///' + database_filepath)
    df = pd.read_sql_table('EAdescription', engine)
    return df
コード例 #33
0
ファイル: sql.py プロジェクト: tmct/dask
def read_sql_table(table,
                   uri,
                   index_col,
                   divisions=None,
                   npartitions=None,
                   limits=None,
                   columns=None,
                   bytes_per_chunk="256 MiB",
                   head_rows=5,
                   schema=None,
                   meta=None,
                   engine_kwargs=None,
                   **kwargs):
    """
    Create dataframe from an SQL table.

    If neither divisions or npartitions is given, the memory footprint of the
    first few rows will be determined, and partitions of size ~256MB will
    be used.

    Parameters
    ----------
    table : string or sqlalchemy expression
        Select columns from here.
    uri : string
        Full sqlalchemy URI for the database connection
    index_col : string
        Column which becomes the index, and defines the partitioning. Should
        be a indexed column in the SQL server, and any orderable type. If the
        type is number or time, then partition boundaries can be inferred from
        npartitions or bytes_per_chunk; otherwide must supply explicit
        ``divisions=``.
        ``index_col`` could be a function to return a value, e.g.,
        ``sql.func.abs(sql.column('value')).label('abs(value)')``.
        ``index_col=sql.func.abs(sql.column("value")).label("abs(value)")``, or
        ``index_col=cast(sql.column("id"),types.BigInteger).label("id")`` to convert
        the textfield ``id`` to ``BigInteger``.

        Note ``sql``, ``cast``, ``types`` methods comes frome ``sqlalchemy`` module.

        Labeling columns created by functions or arithmetic operations is
        required.
    divisions: sequence
        Values of the index column to split the table by. If given, this will
        override npartitions and bytes_per_chunk. The divisions are the value
        boundaries of the index column used to define the partitions. For
        example, ``divisions=list('acegikmoqsuwz')`` could be used to partition
        a string column lexographically into 12 partitions, with the implicit
        assumption that each partition contains similar numbers of records.
    npartitions : int
        Number of partitions, if divisions is not given. Will split the values
        of the index column linearly between limits, if given, or the column
        max/min. The index column must be numeric or time for this to work
    limits: 2-tuple or None
        Manually give upper and lower range of values for use with npartitions;
        if None, first fetches max/min from the DB. Upper limit, if
        given, is inclusive.
    columns : list of strings or None
        Which columns to select; if None, gets all; can include sqlalchemy
        functions, e.g.,
        ``sql.func.abs(sql.column('value')).label('abs(value)')``.
        Labeling columns created by functions or arithmetic operations is
        recommended.
    bytes_per_chunk : str, int
        If both divisions and npartitions is None, this is the target size of
        each partition, in bytes
    head_rows : int
        How many rows to load for inferring the data-types, unless passing meta
    meta : empty DataFrame or None
        If provided, do not attempt to infer dtypes, but use these, coercing
        all chunks on load
    schema : str or None
        If using a table name, pass this to sqlalchemy to select which DB
        schema to use within the URI connection
    engine_kwargs : dict or None
        Specific db engine parameters for sqlalchemy
    kwargs : dict
        Additional parameters to pass to `pd.read_sql()`

    Returns
    -------
    dask.dataframe

    Examples
    --------
    >>> df = dd.read_sql_table('accounts', 'sqlite:///path/to/bank.db',
    ...                  npartitions=10, index_col='id')  # doctest: +SKIP
    """
    import sqlalchemy as sa
    from sqlalchemy import sql
    from sqlalchemy.sql import elements

    if index_col is None:
        raise ValueError("Must specify index column to partition on")
    engine_kwargs = {} if engine_kwargs is None else engine_kwargs
    engine = sa.create_engine(uri, **engine_kwargs)
    m = sa.MetaData()
    if isinstance(table, str):
        table = sa.Table(table,
                         m,
                         autoload=True,
                         autoload_with=engine,
                         schema=schema)

    index = table.columns[index_col] if isinstance(index_col,
                                                   str) else index_col
    if not isinstance(index_col, (str, elements.Label)):
        raise ValueError(
            "Use label when passing an SQLAlchemy instance as the index (%s)" %
            index)
    if divisions and npartitions:
        raise TypeError(
            "Must supply either divisions or npartitions, not both")

    columns = ([(table.columns[c] if isinstance(c, str) else c)
                for c in columns] if columns else list(table.columns))
    if index_col not in columns:
        columns.append(table.columns[index_col] if isinstance(index_col, str
                                                              ) else index_col)

    if isinstance(index_col, str):
        kwargs["index_col"] = index_col
    else:
        # function names get pandas auto-named
        kwargs["index_col"] = index_col.name

    if meta is None:
        # derive metadata from first few rows
        q = sql.select(columns).limit(head_rows).select_from(table)
        head = pd.read_sql(q, engine, **kwargs)

        if head.empty:
            # no results at all
            name = table.name
            schema = table.schema
            head = pd.read_sql_table(name,
                                     uri,
                                     schema=schema,
                                     index_col=index_col)
            return from_pandas(head, npartitions=1)

        bytes_per_row = (head.memory_usage(deep=True,
                                           index=True)).sum() / head_rows
        meta = head.iloc[:0]
    else:
        if divisions is None and npartitions is None:
            raise ValueError(
                "Must provide divisions or npartitions when using explicit meta."
            )

    if divisions is None:
        if limits is None:
            # calculate max and min for given index
            q = sql.select([sql.func.max(index),
                            sql.func.min(index)]).select_from(table)
            minmax = pd.read_sql(q, engine)
            maxi, mini = minmax.iloc[0]
            dtype = minmax.dtypes["max_1"]
        else:
            mini, maxi = limits
            dtype = pd.Series(limits).dtype

        if npartitions is None:
            q = sql.select([sql.func.count(index)]).select_from(table)
            count = pd.read_sql(q, engine)["count_1"][0]
            npartitions = (int(
                round(count * bytes_per_row /
                      dask.utils.parse_bytes(bytes_per_chunk))) or 1)
        if dtype.kind == "M":
            divisions = pd.date_range(
                start=mini,
                end=maxi,
                freq="%iS" % ((maxi - mini).total_seconds() / npartitions),
            ).tolist()
            divisions[0] = mini
            divisions[-1] = maxi
        elif dtype.kind in ["i", "u", "f"]:
            divisions = np.linspace(mini, maxi, npartitions + 1).tolist()
        else:
            raise TypeError(
                'Provided index column is of type "{}".  If divisions is not provided the '
                "index column type must be numeric or datetime.".format(dtype))

    parts = []
    lowers, uppers = divisions[:-1], divisions[1:]
    for i, (lower, upper) in enumerate(zip(lowers, uppers)):
        cond = index <= upper if i == len(lowers) - 1 else index < upper
        q = sql.select(columns).where(sql.and_(index >= lower,
                                               cond)).select_from(table)
        parts.append(
            delayed(_read_sql_chunk)(q,
                                     uri,
                                     meta,
                                     engine_kwargs=engine_kwargs,
                                     **kwargs))

    engine.dispose()

    return from_delayed(parts, meta, divisions=divisions)
コード例 #34
0
app = Flask(__name__)

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens

# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('Cleaned_Messages', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():
    
    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
    
コード例 #35
0
import datetime
import sqlalchemy
import pandas as pd
import sqlite3
import numpy as np

PATH = 'my_file'

# Reads in databases from tasks 1 and 2
engine = sqlalchemy.create_engine('sqlite:///' + PATH)
all_data = pd.read_sql_table("TempAndCO2Log", engine)
all_data.to_csv("tester.csv")
コード例 #36
0
def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('messages_disaster', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # Using the Genre's as provided and added in Top 10
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
コード例 #37
0
ファイル: run.py プロジェクト: ebrym/DisasterResponse
    
    # remove stop words
    stopwords_ = stopwords.words("english")
    words = [word for word in words if word not in stopwords_]
    
    # extract root form of words
    words = [WordNetLemmatizer().lemmatize(word, pos='v') for word in words]

    return words




# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('DisasterMessages', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():
    
    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
コード例 #38
0
ファイル: run.py プロジェクト: bobzhoumj/project2
def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load dataDisasterResponse
engine = create_engine('sqlite:////home/workspace/models/DisasterResponse.db')
#engine = create_engine('sqlite:///.workspace/models/DisasterResponse.db')
df = pd.read_sql_table('DisasterResponse', engine)

# load model
model = joblib.load("/home/workspace/models/classifier.pickle")
#model = joblib.load("./models/classifier.pickle")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
コード例 #39
0
def index(request):

    dests = Destination.objects.all()
    if request.user.is_authenticated:
        global df
        engine = create_engine(
            'postgresql+psycopg2://postgres:postgres@localhost:5432/telusko')
        df_d = pd.read_sql_table(
            "travello_destination",
            con=engine,
            schema='public',
            coerce_float=True,
            columns=['name', 'img', 'desc', 'state', 'city', 'typeofplace'])
        df = pd.DataFrame(df_d)
        geocoder = OpenCageGeocode('ea7fd5e689b149c38ef13cbed352bff5')
        list_lat = []
        list_long = []
        for index, row in df.iterrows():

            name = get_name_from_index(index, df)

            state = get_state_from_index(index, df)
            city = get_city_from_index(index, df)
            query = str(name) + ',' + str(city) + ',' + str(state)
            print("hi")
            results = geocoder.geocode(query)
            print('$$$$$$', results)
            if len(results) != 0:
                lat = results[0]['geometry']['lat']
                longi = results[0]['geometry']['lng']
            else:
                print("results is empty")

            print("hello", index, name, state)
            list_lat.append(lat)
            list_long.append(longi)
        df['lat'] = list_lat
        df['lon'] = list_long
        print(df)
        features = ['desc', 'state', 'typeofplace']
        for feature in features:
            df[feature] = df[feature].fillna('')

        df['combined_features'] = df.apply(combine_features, axis=1)
        cv = CountVectorizer()
        count_matrix = cv.fit_transform(df['combined_features'])
        cosine_sim = cosine_similarity(count_matrix)
        custom = CustomPreferences.objects.all()
        for c in custom:
            if str(c.user) == str(request.user):
                user_prefer = c.preferences
                user_prefer = user_prefer.split(",")
                rows_data = []
                for up in user_prefer:
                    place_index = get_index_from_title(up, df)
                    similar_places = list(enumerate(cosine_sim[place_index]))
                    sorted_similar_places = sorted(similar_places,
                                                   key=lambda x: x[1],
                                                   reverse=True)
                    i = 0
                    for place in sorted_similar_places:
                        row_data = get_title_from_index(place[0], df)
                        rows_data.append(row_data)
                        i = i + 1
                        if i > 3:
                            break
                final_data = []
                for dest in dests:
                    for lists in rows_data:
                        if dest.name in lists:
                            result = TextBlob(dest.desc)
                            polar = result.sentiment.polarity
                            if polar > 0.0:
                                final_data.append(dest)

    else:
        user_prefer = []
        final_data = []

    return render(request, "index.html", {
        'dests': dests,
        'recommendations': final_data
    })
コード例 #40
0
    # Initialize lemmatizer
    lemmatizer = WordNetLemmatizer()

    # Lowercase, eliminate blank spaces and findin the root form of the words
    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok, pos='v').lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
try:
    engine = create_engine('sqlite:///../data/DisasterResponse.db')
    df = pd.read_sql_table('messages_categories', con=engine)
except:
    print(
        'If load data from database failed, try to run it from the app folder')

# load model
model = joblib.load("../models/classifier_model.pkl")


# Function for first plot
def first_plot(df):
    """Create first plot TOP 10 categories """

    # Define counts
    categories = df.drop(['id', 'message', 'original', 'genre'],
                         axis=1).sum().sort_values(ascending=False)
コード例 #41
0
from wtforms import TextField, Form
import pandas as pd
from sqlalchemy import create_engine
from global_parameters import *

# assign values from global_parameters.py to local variables
# number_movies_returned - how many most similar movies output when requested for each movie
number_movies_returned = global_number_movies_returned
# database_filepath - name of the sqlalchemy database file where recommendations are stored
database_filepath = global_database_filepath

app = Flask(__name__)

# extract movies data
engine = create_engine('sqlite:///' + database_filepath)
movies_data = pd.read_sql_table('Closest_movies', engine)

engine.dispose()

# get the movie titles
movie_titles = list(movies_data['movie_title'])


# SearchForm class will allow us to have autocomplete feature
class SearchForm(Form):
    movie_autocomplete = TextField('Movie name', id='movie_autocomplete')


@app.route('/autocomplete', methods=['GET', 'POST'])
def autocomplete():
    '''
# -*- coding: utf-8 -*-

###############################################################################
#######################            正文代码             #######################
###############################################################################

# 代码 4-12
from sqlalchemy import create_engine
import pandas as pd
## 创建数据库连接
engine = create_engine('mysql+pymysql://root:[email protected]:\
3306/testdb?charset=utf8')
detail= pd.read_sql_table('meal_order_detail1',con = engine)
print('订单详情表的索引为:', detail.index)

print('订单详情表的所有值为:','\n', detail.values)
print('订单详情表的列名为:','\n', detail.columns)
print('订单详情表的数据类型为:','\n', detail.dtypes)


# 代码 4-13
## 查看DataFrame的元素个数
print('订单详情表的元素个数为:', detail.size)
print('订单详情表的维度数为:', detail.ndim) ## 查看DataFrame的维度数
print('订单详情表的形状为:', detail.shape) ## 查看DataFrame的形状



# 代码 4-14
print('订单详情表转置前形状为:',detail.shape)
print('订单详情表转置后形状为为:',detail.T.shape)
コード例 #43
0
                        help='obsHistID to generate InstanceCatalog for')
    parser.add_argument('--sne_truth_cat', type=str,
                        help='path to lensed AGN truth catalog')
    parser.add_argument('--output_dir', type=str,
                        help='output directory for catalog and sed folder')
    parser.add_argument('--cat_file_name', type=str,
                        help='filename of instance catalog written')
    parser.add_argument('--sed_folder', type=str,
                        help='directory to put SNe SEDs. Will appear in output_dir.')

    args = parser.parse_args()

    obs_gen = ObservationMetaDataGenerator(database=args.obs_db,
                                           driver='sqlite')

    sne_truth_db = create_engine('sqlite:///%s' % args.sne_truth_cat, echo=False)
    sne_truth_cat = pd.read_sql_table('lensed_sne', sne_truth_db)
    lensed_sne_ic = lensedSneCat(sne_truth_cat, args.output_dir,
                                 args.cat_file_name, args.sed_folder)

    obs_md = get_obs_md(obs_gen, args.obs_id, 2, dither=True)
    print(obs_md.mjd.TAI)
    for obs_time in np.arange(obs_md.mjd.TAI, obs_md.mjd.TAI + 35.1, 0.25):
        obs_filter = obs_md.bandpass
        print('Writing Instance Catalog for Visit: %i at MJD: %f in Bandpass: %s' % (args.obs_id,
                                                                                     obs_time,
                                                                                     obs_filter))
        add_to_cat_idx, sne_magnorms, sne_sed_names = lensed_sne_ic.calc_sne_mags(obs_time, obs_filter)
        lensed_sne_ic.output_instance_catalog(add_to_cat_idx, sne_magnorms,
                                            sne_sed_names, obs_md, str('test_cat_%.4f' % obs_time))
コード例 #44
0
def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('df', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
コード例 #45
0
def application (environ, start_response):

    session = get_database_session(engine)

    json_c_type = "application/json"

    paths = [p for p in environ['PATH_INFO'].split('/') if p != '']

    headers = []
    origin = environ.get("HTTP_ORIGIN")
    method = environ['REQUEST_METHOD']

    response_factory = ResponseFactory()

    # /
    if len(paths) == 0:

      payload = {f'message': f"Server works!"}

      return response_factory \
              .create(200) \
              .get_response(payload, start_response)

    else:
      # /players/
      if paths[0] == 'players':

        # GET: /players/
        if method == 'GET':

          # GET: /players/
          if len(paths) == 1:
            try:
              # query extract
              query = parse_qs(environ['QUERY_STRING'])
              name = query.get('name', [''])[0]
              club = query.get('club', [''])[0]
              nationality = query.get('nationality', [''])[0]
              limit = int(query.get('limit', [10])[0])
              skip = int(query.get('skip', [0])[0])

              rows_all = session.query(Player)

              if name != '':
                rows_all = rows_all.filter(Player.name.ilike(f'%{name}%'))
              if club != '':
                rows_all = rows_all.filter(Player.club.ilike(f'%{club}%'))
              if nationality != '':
                rows_all = rows_all.filter(Player.nationality.ilike(f'%{nationality}%'))

              rows_all = rows_all.order_by(Player.overall.desc(), Player.value.desc())

              rows =  rows_all \
                        .offset(skip) \
                        .limit(limit)

              # result set
              records = [dict(id=r.id, name=r.name, position=r.position, nationality=r.nationality,
                              flag=r.flag, club=r.club, age=r.age, photo=r.photo,
                              value=r.value, overall=r.overall )
                        for r in rows]

              payload = {
                'count': rows_all.count(),
                'results': records
              }

              return response_factory \
                      .create(200, origin, method) \
                      .get_response(payload, start_response)

            except Exception as e:

              payload = {f'message': f"Oops! Something went wrong! => {e}"}

              return response_factory \
                      .create(500) \
                      .get_response(payload, start_response)


          # GET: /players/...
          else:

            payload = {'message': f"You are not allowed to make request to `{environ['PATH_INFO']}`!"}

            return response_factory \
                    .create(403) \
                    .get_response(payload, start_response)

        # POST: /players/
        elif method == 'POST':

          # POST: /players/team/
          if len(paths) == 2 and paths[1] == 'team':

            length = int(environ.get('CONTENT_LENGTH', '0'))
            request_body = environ['wsgi.input'].read(length)
            body = json.loads(request_body)
            # verify
            formation = body.get('formation', None)
            budget = int(body.get('budget', 0))
            include_free_agents = body.get('include_free_agents', True)

            if formation and all([p in POSITIONS for p in formation]) and budget >= 1 * 10**6:

              try:

                formation = [ f.lower() for f in formation]

                temp = pd.read_sql_table('players', con=engine)[['id', 'position', 'value', 'overall']] \
                        .dropna(subset=['position']) \
                        .query(f'position in {formation}')

                temp = temp if include_free_agents else temp.query('value > 0')

                prob, ids = compute_best_lineup(temp, formation, budget)

                rows = session.query(Player).filter(Player.id.in_(ids))

                records = { r.position: dict(id=r.id, name=r.name, position=r.position, nationality=r.nationality,
                                             flag=r.flag, club=r.club, age=r.age, photo=r.photo,
                                              value=r.value, overall=r.overall )
                            for r in rows}

                payload = {
                  'total_overall': sum([r.overall for r in rows]),
                  'total_value': sum([r.value for r in rows]),
                  'formation': formation,
                  'results': records
                }

                return response_factory \
                        .create(200, origin, method) \
                        .get_response(payload, start_response)

              except Exception as e:

                payload = {f'message': f"Oops! Something went wrong! => {e}"}

                return response_factory \
                        .create(500) \
                        .get_response(payload, start_response)


            else:

              payload = {
                'message': f"Please select 11 unique and valid positions and set budget greater than € 1,000,000!",
                'formation': formation,
                'budget': budget
              }

              return response_factory \
                        .create(400) \
                        .get_response(payload, start_response)

          # POST: /players/...
          else:

            payload = {'message': f"You are not allowed to make request to `{environ['PATH_INFO']}`!"}

            return response_factory \
                    .create(403) \
                    .get_response(payload, start_response)

        # HEAD/OPTIONS: /players/
        elif method == 'HEAD' or  method == 'OPTIONS':

          return response_factory \
                    .create(200, origin, method) \
                    .get_response(None, start_response)

        # PUT/DELETE/PATCH/OPTIONS: /players/
        else:

          payload = {f'message': f"You are not allowed to make request with {environ['REQUEST_METHOD']} to `{environ['PATH_INFO']}`!"}

          return response_factory \
                  .create(405) \
                  .get_response(payload, start_response)


      # /assets/
      elif paths[0] == 'assets':

        if method == 'GET':
          status = '200 OK'


          try:
            headers.append(('Content-Type', 'image/png'))

            start_response(status, headers)

            with open(f"./{environ['PATH_INFO']}", "rb") as f:
              img = f.read()
              size = stat(f"./{environ['PATH_INFO']}").st_size

            if size == 0:
              with open(f"./assets/players/000000.png", "rb") as f:
                img = f.read()

            return [img]

          except Exception as e:

            payload = {'message': f"Image is not available! => {e}"}

            return response_factory \
                    .create(404) \
                    .get_response(payload, start_response)

          else:

            payload = {f'message': f"You are not allowed to make request with {environ['REQUEST_METHOD']} to `{environ['PATH_INFO']}`!"}

            return response_factory \
                    .create(405) \
                    .get_response(payload, start_response)

      # /*
      else:

        payload = {'message': f"You are not allowed to make request to `{environ['PATH_INFO']}`!"}

        return response_factory \
                .create(403) \
                .get_response(payload, start_response)
コード例 #46
0
    'mssql://LAPTOP-TH3PDN0I/Group_8_DB?driver=ODBC+Driver+17+for+SQL+Server')
print("Connected.")

csvfile = '../SPARC_10k_part-ce.csv'
print("File name to load: " + csvfile)

################################################################################
#
# Job Step 10: Load Dimensions
#
################################################################################
jobutils.printStepStart("10")

print("Loading dimensions to memory...")

dim_date_df = pd.read_sql_table('DimDate', con=engine).fillna('')
dim_location_df = pd.read_sql_table('DimLocation', con=engine).fillna('')
dim_demographics_df = pd.read_sql_table('DimDemographics',
                                        con=engine).fillna('')
dim_payment_df = pd.read_sql_table('DimPayment', con=engine).fillna('')
dim_clinic_class_df = pd.read_sql_table('DimClinicClass',
                                        con=engine).fillna('')
dim_apr_class_df = pd.read_sql_table('DimAPRClassification',
                                     con=engine).fillna('')
dim_admission_df = pd.read_sql_table('DimAdmission', con=engine).fillna('')
dim_provider_df = pd.read_sql_table('DimProvider', con=engine).fillna('')

print("Done. Dimensions loaded.")

################################################################################
#
コード例 #47
0
ファイル: sql.py プロジェクト: JacobGreen770/Pancham
 def time_read_sql_table_column(self, dtype):
     read_sql_table(self.table_name, self.con, columns=[dtype])
コード例 #48
0
def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('DisasterRis', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
コード例 #49
0

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for token in tokens:
        clean_token = lemmatizer.lemmatize(token).lower().strip()
        clean_tokens.append(clean_token)
    return clean_tokens


# load data
engine = create_engine('sqlite:///./data/DisasterResponse.db')
df = pd.read_sql_table('Disaster_Response_ETL', engine)

# load model
model = joblib.load("./models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
コード例 #50
0
def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('DataFrame', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
    category_counts = df.iloc[:, 4:].sum().sort_values(ascending=False)[1:6]
コード例 #51
0
def start_flow():

    job_id = admin_api.start_job()

    if (not job_id):
        current_app.logger.info('Failed to get job_id')
        job_outcome = 'busy'

    else:
        log_db.log_exec_status(job_id, 'start_flow', 'executing', '')

        file_path_list = os.listdir(CURRENT_SOURCE_FILES_PATH)

        if file_path_list:
            with engine.connect() as connection:
                Base.metadata.create_all(connection)

                # Get previous version of pdp_contacts table, which is used later to classify new records
                pdp_contacts_df = pd.read_sql_table('pdp_contacts', connection)
                pdp_contacts_df = pdp_contacts_df[
                    pdp_contacts_df["archived_date"].isnull()]
                pdp_contacts_df = pdp_contacts_df.drop(columns=[
                    'archived_date', 'created_date', '_id', 'matching_id'
                ])

                current_app.logger.info(
                    'Loaded {} records from pdp_contacts table'.format(
                        pdp_contacts_df.shape[0]))

                # Clean the input data and normalize/rename columns
                # Populate new records in secondary tables (donations, volunteer shifts)
                # input - existing files in path
                # output - normalized object of all entries, as well as the input json rows for primary sources
                log_db.log_exec_status(job_id, 'clean_and_load', 'executing',
                                       '')
                normalized_data, source_json, manual_matches_df = clean_and_load_data.start(
                    connection, pdp_contacts_df, file_path_list)

                # Standardize column data types via postgres (e.g. reading a csv column as int vs. str)
                # (If additional inconsistencies are encountered, may need to enforce the schema of
                # the contacts loader by initializing it from pdp_contacts.)
                normalized_data.to_sql('_temp_pdp_contacts_loader',
                                       connection,
                                       index=False,
                                       if_exists='replace')
                normalized_data = pd.read_sql_table(
                    '_temp_pdp_contacts_loader', connection)

                # Classifies rows to old rows that haven't changed, updated rows and new rows - compared to the existing state of the DB
                log_db.log_exec_status(job_id, 'classify', 'executing', '')
                rows_classified = calssify_new_data.start(
                    pdp_contacts_df, normalized_data)

                # Archives rows the were updated in the current state of the DB (changes their archived_date to now)
                archive_rows.archive(connection, rows_classified["updated"])

                # Match new+updated records against previous version of pdp_contacts database, and
                # write these rows to the database.
                match_data.start(connection, rows_classified,
                                 manual_matches_df, job_id)

                # Copy raw input rows to json fields in pdp_contacts,
                # using a temporary table to simplify the update code.
                current_app.logger.info(
                    'Saving json of original rows to pdp_contacts')
                source_json.to_sql('_temp_pdp_contacts_loader',
                                   connection,
                                   index=False,
                                   if_exists='replace')
                # https://www.postgresql.org/docs/8.4/sql-update.html
                connection.execute('''
                    UPDATE pdp_contacts pdp
                    SET json = to_json(temp.json)
                    FROM _temp_pdp_contacts_loader temp
                    WHERE
                        pdp.source_type = temp.source_type AND
                        pdp.source_id = temp.source_id AND
                        pdp.archived_date IS NULL
                ''')

            current_app.logger.info('Finished flow script run')
            job_outcome = 'completed'

        else:  # No files in list
            current_app.logger.info('No files to process')
            job_outcome = 'nothing to do'

        log_db.log_exec_status(job_id, 'flow', 'complete', '')

    return job_outcome
コード例 #52
0
ファイル: sql.py プロジェクト: JacobGreen770/Pancham
 def time_read_sql_table_all(self):
     read_sql_table(self.table_name, self.con)
コード例 #53
0
def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('messages', engine)

# load model
model = joblib.load("../models/classifier.joblib")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():
    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    num_categories = df[[
コード例 #54
0
def extract_array(fname,
                  param_names,
                  result_name="result",
                  non_existing=np.nan,
                  redux_funs=[np.nanmean],
                  return_param_values=True,
                  conditionals={},
                  db_is_sqlite=False):
    """
    Given a database file (as e.g. product by FireAndForgetJob, extraxts an
    array where each dimension corresponds to a provided parameter, and
    each element is a redux (e.g. mean) of all results (of given same)
    for the parameter combinations.
    An optional set of additional conditions can be specified.
    
    Database file can be csv or sqlite.
    
    Empty parameter names lead to just aggregating (sliced by conditionals) the results.
    
    A default value can be specified.
    """
    if db_is_sqlite:
        from sqlalchemy import create_engine
        import sqlalchemy as sa
        engine = create_engine('sqlite:///{}'.format(fname))
        df = pd.read_sql_table("FireAndForgetJob", engine)
    else:
        with open(fname) as f:
            df = pd.read_csv(f, error_bad_lines=False, warn_bad_lines=False)

    for k, v in conditionals.items():
        df = df.loc[df[k] == v]
        if k in param_names:
            param_names.remove(k)

    # no parameter names means just return the aggregated values for the (sliced) result
    if len(param_names) == 0:
        return np.array([redux(df[result_name]) for redux in redux_funs])

    param_values = {
        param_name: np.sort(df[param_name].dropna().unique())
        for param_name in param_names
    }

    sizes = [len(param_values[param_name]) for param_name in param_names]
    results = [np.zeros(tuple(sizes)) + non_existing for _ in redux_funs]

    # compute aggregate for each unique appearance of all parameters
    redux = df.groupby(param_names,
                       as_index=False)[result_name].agg(redux_funs)

    # since not all parameter combinations might be computed, iterate and pull out computed ones
    all_combs = itertools.product(
        *[param_values[param_name] for param_name in param_names])

    for index, comb in enumerate(all_combs):
        # one element tuples should be the value itself
        if len(comb) == 1:
            comb = comb[0]

        result_ind = np.unravel_index(index, tuple(sizes))

        # parameter combination was computed
        if comb in redux.index:
            # extract results and put them in the right place
            for i, redux_fun in enumerate(redux_funs):
                results[i][result_ind] = redux.loc[comb][redux_fun.__name__]

    if not return_param_values:
        return results
    else:
        return results, param_values
コード例 #55
0
app = Flask(__name__)

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens

# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('data/DisasterResponse_table',engine)
    

#load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')

def index():
    
    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
コード例 #56
0
def load_from_mysql(table_name: str):
    """读取远程mysql数据表"""
    LOG.logger_font.info(msg=f"Reading mysql table {table_name}")
    table = pd.read_sql_table(con=RemoteMySQLConfig.engine,
                              table_name=f"{table_name}")
    return table
コード例 #57
0
def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data.db')
df = pd.read_sql_table('data', engine)

# load model
model = joblib.load("../classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    percent_request = 100 * df.groupby('genre').sum()['request'] / (
        df.groupby('genre').count()['message'])
コード例 #58
0
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine(
    'postgresql://*****:*****@localhost:5432/xueshandai')
ip = pd.read_sql_table('ip_to_map_province', engine)


def get_ip_value(ip):
    ip2 = ip.split('.')
    ipv = int(ip2[0]) * 256**3 + int(ip2[1]) * 256**2 + int(
        ip2[2]) * 256 + int(ip2[3])
    return ipv


def findcountryprovince_from_db(ip):
    #     print("ip:",ip)
    ipv = get_ip_value(ip)
    sql = 'select * from ip_store2 where ipv1<=' + str(
        ipv) + ' and ipv2>=' + str(ipv)
    #     print("sql:",sql)
    data = pd.read_sql_query(sql, engine)
    if data.size > 0:
        return data.loc[0, 'province']
    else:
        return 'unknow'


ip['province'] = ''

for k in ip.index:
    #     print(k,ip.loc[k,'ip'])
コード例 #59
0
POSTGRES_PORT = 5432
POSTGRES_USERNAME = '******'
POSTGRES_PASSWORD = db_password
POSTGRES_DBNAME = 'us_gun_violence'

# In[4]:

# creat connection string and database engine
db_string = f'postgres://{POSTGRES_USERNAME}:{POSTGRES_PASSWORD}@{POSTGRES_ADDRESS}:{POSTGRES_PORT}/{POSTGRES_DBNAME}'

engine = create_engine(db_string)

# In[5]:

# import transformed suspects dataframe
suspects_df = pd.read_sql_table('suspects_ml_transformed', engine)
suspects_df.head()

# In[6]:

# import incidents dataframe
incidents_df = pd.read_sql_table('incidents', engine)
incidents_df.head()

# ## Preprocess Data

# In[7]:

# combine suspect and incident data
suspects_incidents_df = suspects_df.merge(incidents_df,
                                          how='left',
コード例 #60
0
# creating lists of the original names, new names, and keys for the database migration
old_table_names = [
    'armory_item', 'armory_weapon', 'charactercreator_character',
    'charactercreator_character_inventory', 'charactercreator_cleric',
    'charactercreator_fighter', 'charactercreator_mage',
    'charactercreator_necromancer', 'charactercreator_thief'
]

new_table_names = [
    'items', 'weapons', 'characters', 'inventories', 'clerics', 'fighters',
    'mages', 'necromancers', 'thieves'
]

keys = [
    'item_id', 'item_ptr_id', 'character_id', 'id', 'character_ptr_id',
    'character_ptr_id', 'character_ptr_id', 'mage_ptr_id', 'character_ptr_id'
]

for i in range(len(old_table_names)):
    table = sq_curs.execute(""" select * from {}""".format(old_table_names[i]))
    table_df = pd.read_sql_table('{}'.format(old_table_names[i]), sq_engine)
    table_df.set_index(keys[i], inplace=True)
    if len(table_df) > 0:
        table_df.to_sql('{}'.format(new_table_names[i]),
                        engine,
                        if_exists='replace')

pg_conn.close()
sq_con.close()