Ejemplo n.º 1
0
def write_table(df, tablename, conn):
    """ Write a dataframe to the database. """
    with catch_warnings():
        filterwarnings('ignore',
                       message='The provided table name \'%s\' is not found exactly as such in the database' % tablename)
        to_sql(df, name=tablename, con=conn,
               index=not any(name is None for name in df.index.names))  # load index into db if all levels are named
def write_to_db(df, name):
    # TODO: float with nan not being handled

    # HACK:
    sql._SQL_TYPES['text']['mysql'] = 'VARCHAR (255)'

    sql.to_sql(df.where(pd.notnull(df), None), con=con, name=name, flavor='mysql', if_exists='replace', index="id")
Ejemplo n.º 3
0
def download_dd_data(start=None):
    '''
    获取大单数据
    '''
    conn = db.get_dd_data_db()
    start = start
    if start is None:
        start = utils.today_last_year(1)
    for code in get_all_stock_code():

        end = datetime.today().date()
        while start < end:
            date = end.strftime('%Y-%m-%d')
            df = ts.get_sina_dd(code=code, date=date, vol=500)
            if df is not None:
                df.insert(0, 'code', code)
                try:
                    sql_df = df.loc[:, :]
                    sql.to_sql(sql_df, name='dd_data', con=conn,
                               index=True, if_exists='append')
                    log.info('%s,%s dd data download ok.' % (code, start))
                except Exception as e:
                    log.error('download error:%s,%s' % (code, date))
                pass
            start = start + timedelta(days=1)
Ejemplo n.º 4
0
 def test_safe_names_warning(self):
     # GH 6798
     df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b '])  # has a space
     # warns on create table with spaces in names
     with tm.assert_produces_warning():
         sql.to_sql(df, "test_frame3_legacy", self.conn,
                    flavor="sqlite", index=False)
Ejemplo n.º 5
0
    def test_sql_open_close(self):
        """
        Test if the IO in the database still work if the connection
        is closed between the writing and reading (as in many real
        situations).
        """

        self._load_test2_data()

        with tm.ensure_clean() as name:

            conn = self.connect(name)

            sql.to_sql(
                self.test_frame2,
                "test_frame2_legacy",
                conn,
                flavor="sqlite",
                index=False,
            )

            conn.close()
            conn = self.connect(name)

            result = sql.read_sql(
                "SELECT * FROM test_frame2_legacy;",
                conn,
                flavor="sqlite",
            )

            conn.close()

        tm.assert_frame_equal(self.test_frame2, result)
Ejemplo n.º 6
0
def download_money_flow_data(num=1000):
    '''
    get money flow from sina finance
    :param num:
    :return:
    '''
    conn = db.get_money_flow_db()
    cur_time = datetime.now()
    for code in get_all_stock_code():
        code = append_stock_perfix(code)
        cost = datetime.now()
        for i in range(retry):
            try:
                url = sina_money_flow_api % (num, code)
                text = request.urlopen(url, timeout=10).read()
                text = text.decode('GBK')
                log.info('url=%s,size=%d, try=%d' % (url, len(text), i))
                if len(text) < 10:
                    continue
                # j = demjson.decode(text, 'utf-8') #json很大的时候效率非常查
                text = text[2:-2]
                j = text.replace('"', '').split('},{')
                head = ['date', 'close', 'chg_r', 'turnover', 'netamount', 'ratio', 'zl_netamount', 'zl_ratio',
                        'cat_ratio']
                # 日期	收盘价	涨跌幅	换手率	净流入   	净流入率	主力净流入	    主力净流入率	行业净流入率
                data = []

                for x in j:
                    m = {}
                    for s in x.split(','):
                        k, v = s.split(':')
                        if '-' == v or 'null' == v:
                            v = '0.0'
                        m[k] = v
                    date = m['opendate']
                    close = float(m['trade'])
                    chg_r = float(m['changeratio'])
                    turnover = float(m['turnover']) / 10000
                    netamount = float(m['netamount']) / 10000
                    ratio = float(m['ratioamount'])
                    zl_netamount = float(m['r0_net']) / 10000
                    zl_ratio = float(m['r0_ratio'])
                    cat_ratio = float(m['cate_ra'])
                    data.append([date, close, chg_r, turnover, netamount, ratio, zl_netamount, zl_ratio, cat_ratio])

                df = pd.DataFrame(data, columns=head)
                log.info('data ok')
                if not df.empty:
                    df.insert(1, 'code', code)
                    sql_df = df.loc[:, :]
                    sql.to_sql(sql_df, name='money_flow', con=conn, index=False, if_exists='append')
                    log.info('%s,%s,%d money flow data download ok.' % (code, str(start), len(sql_df)))
                    break
            except Exception as e:
                log.error('error:code=%s,start=%s,msg=%s' % (code, start, e))
        log.debug('%s,costs:%d s' % (code, (datetime.now() - cost).seconds))
    conn.close()
    log.info('money flow data download complete. cost %d s' % (datetime.now() - cur_time).seconds)

    pass
Ejemplo n.º 7
0
def CleanReferral():
    print("Joining referral data to LA City zip codes to filter referrals within LA City.")
    # Join DCFS and LA City zip code by zip code
    # To obtain referrals made within LA City (DCFS referral data is originally county-level)
    conn = sq.connect('ReferralCrimeMap.db')
    cur = conn.cursor()
    query = '''
            SELECT *
            FROM dcfs
            JOIN LAzip
            ON dcfs.ref_zip = LAzip.zipcode
            '''
    dcfs_all = pd.read_sql(query,conn)
    conn.close()

    # Select the referrals received in the focal year 2016
    print("Select the referrals received in focal year 2016.")
    dcfs_all['ref_year'] = dcfs_all['ref_date'].map(lambda x: x.split('-')[0])
    dcfs_all['ref_month'] = dcfs_all['ref_date'].map(lambda x: x.split('-')[1])
    dcfs_all.rename(columns={'prime_key':'zip_ID'}, inplace=True)
    dcfs16 = dcfs_all[dcfs_all['ref_year']=='2016']
    
    # Save the data into ReferralCrimeMap.db
    print("Referral_LAcity2016 is saved to ReferralCrimeMap.db")
    conn = sq.connect('ReferralCrimeMap.db')
    pd_sql.to_sql(dcfs16, 'Referral_LAcity2016', conn, if_exists='replace',index=False)
    conn.close()
Ejemplo n.º 8
0
def daily(now):
    import pandas
    import datetime
    from sql import getCursor,insertStock
    from stock import oneStock,getStockList
    import pandas.io.sql as pdsql
    before=now-datetime.timedelta(700)
    list=getStockList()
    total=len(list)
    today=pandas.DataFrame()
    count=1
    for v in list:
        print str(count)+'/'+str(total),'start',v
        try:
            df=oneStock(v,starttime=before,endtime=now)
            df=df.iloc[[-1]]
            eng=getE()
            pandas.DataFrame.replace(df,{float('inf'):0,float('NaN'):0})
            pdsql.to_sql(df, 'today', eng,  if_exists='append',index=False,index_label='stock_id')
    
        except Exception:
            count+=1
            print Exception.message
            print v,'error'
            continue
        print str(count)+'/'+str(total),'end',v
        count+=1
    
    return
    def generate_sqlite(self):

        print "Converting csv file to sqlite for train set:"
        num_lines = sum(1 for line in open(self.folder+self.file_in))
        columns = ['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',
                   'TIMESTAMP', 'DAYTYPE', 'MISSING_DATA', 'POLYLINE']

        con = sqlite3.connect(self.folder+self.file_out)
        chunk_size = 5000
        count = 1

        for i in range(0, num_lines, chunk_size):

            df = pd.read_csv(self.folder+self.file_in, header=None,
                             nrows=chunk_size, skiprows=i, low_memory=False)
            df.columns = columns
            sql.to_sql(df, name='train_data', con=con, index=False,
                       index_label='molecule_id', if_exists='append')

            print "Batch No. {} completed".format(count)
            count += 1

        con.close()

        # Delete the first row with duplicate column names
        con = sqlite3.connect(self.folder+self.file_out)
        c = con.cursor()
        c.execute("DELETE FROM train_data WHERE TRIP_ID='TRIP_ID'")
        con.commit()
        con.close()

        print "All completed!\n"
Ejemplo n.º 10
0
    def test_read_table_columns(self):
        # test columns argument in read_table
        sql.to_sql(self.test_frame1, 'test_frame', self.conn)

        cols = ['A', 'B']
        result = sql.read_table('test_frame', self.conn, columns=cols)
        self.assertEqual(result.columns.tolist(), cols,
                         "Columns not correctly selected")
Ejemplo n.º 11
0
    def test_to_sql_fail(self):
        sql.to_sql(self.test_frame1, 'test_frame2',
                   self.conn, flavor='sqlite', if_exists='fail')
        self.assertTrue(
            sql.has_table('test_frame2', self.conn, flavor='sqlite'), 'Table not written to DB')

        self.assertRaises(ValueError, sql.to_sql, self.test_frame1,
                          'test_frame2', self.conn, flavor='sqlite', if_exists='fail')
Ejemplo n.º 12
0
    def getData(self):
        # Get crime records via API
        if self.storage == "remote":
            print("Getting crime data from remote source.\nThis takes a while (approx. 5 mins)! Please be patient.")

            # API request information
            client_crime = Socrata('data.lacity.org','7pTgt6f2oTY53aDI1jXNJoNZD')
            offset_temp = 0
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            cur.execute("DROP TABLE IF EXISTS 'Crime2016'")

            # Getting data in dataframe then manipulate before storing in ReferralCrimeMap.db
            while True:
                results = client_crime.get("7fvc-faax", limit=10000, offset=offset_temp)
                crime_df_temp = pd.DataFrame.from_records(results)
                # This loop stops when the next block of dataframe is empty
                if crime_df_temp.empty == True:
                    break

                # Split location_1 into lat and long
                # Create 'year_rptd' to filter cases reported in 2016
                # Create 'count' for later data analysis
                crime_df_temp['location_1'] = crime_df_temp['location_1'].astype('str')
                crime_df_temp['long'] = crime_df_temp['location_1'].map(lambda x: x.split(']')[0].split('[')[-1].split(',')[0])
                crime_df_temp['lat'] = crime_df_temp['location_1'].map(lambda x: x.split(']')[0].split('[')[-1].split(',')[-1])
                crime_df_temp['year_rptd'] = crime_df_temp['date_rptd'].map(lambda x: x.split('-')[0])
                crime_df_temp['month_rptd'] = crime_df_temp['date_rptd'].map(lambda x: x.split('-')[1])
                crime_df_temp['count'] = 1
                crime_df_temp = crime_df_temp[crime_df_temp['year_rptd']=='2016']

                # Insert dataframe into ReferralCrimeMap.db
                pd_sql.to_sql(crime_df_temp, 'Crime2016', conn, if_exists='append', index=False)
                offset_temp+=10000

                # Shows the percentage of data 
                if offset_temp % 100000 == 0:
                    print(offset_temp/2000000*100,"%")
                else:
                    continue
            cur.execute("SELECT * FROM Crime2016")
            print(cur.fetchone())
            conn.close()

        # Load local data if -source is set to local
        else:
            print("Getting crime data from local source.")
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            query = "SELECT * FROM Crime"
            try:
                crime = pd.read_sql(query, conn)
                conn.close()
                print(crime.head())
            except Exception as e:
                print("There is an error:", e)
                print("Please set data course as remote.")
                exit()
Ejemplo n.º 13
0
def test():
    v='sh600216'
#     print str(count)+'/'+str(total),'start',v
#         try:
    df=oneStock(v,starttime=datetime(2016,1,1),endtime=datetime(2016,8,30))
    [conn,cur]=getCursor()
    pdsql.to_sql(df, v, conn, flavor='mysql')
    setPrimaryKey(v)
    return
    def transform(self):

        # this function does the following
        # 1) convert timestamp to time of the day, day of week, and month of year
        # 2) convert polyline to snapshots
        # 3) calculate the trip length
        # 4) calculate the average speed

        print "Generating training file with descriptive stats:"
        # initialize the connection with the input and output sqlite file
        con_in = sqlite3.connect(self.folder+self.file_in)
        con_out = sqlite3.connect(self.folder+self.file_out)

        con_in.text_factory = str
        chunk_reader = pd.read_sql("SELECT * FROM train_data WHERE MISSING_DATA!='1'",
                                   con_in, chunksize=5000)
        count = 1

        for chunk in chunk_reader:

            print 'Chunk {} started:'.format(count)
            chunk['Time_of_Day'] = chunk.TIMESTAMP.map(lambda x: datetime.utcfromtimestamp(float(x)).hour +
                                                                 datetime.utcfromtimestamp(float(x)).minute/60.0 +
                                                                 datetime.utcfromtimestamp(float(x)).second/3600.0)
            chunk['Hour_of_Day'] = chunk.Time_of_Day.map(lambda x: np.round(x))
            chunk['Day_of_Week'] = chunk.TIMESTAMP.map(lambda x: datetime.utcfromtimestamp(float(x)).weekday())
            chunk['Month_of_Year'] = chunk.TIMESTAMP.map(lambda x: datetime.utcfromtimestamp(float(x)).month)

            chunk['POLYLINE_Split'] = chunk.POLYLINE.map(lambda x:
                                                         re.compile("\[[-+]?\d+.\d+,[-+]?\d+.\d+\]").findall(x))
            chunk['Snapshots'] = chunk.POLYLINE_Split.map(lambda x: len(x))

            chunk = chunk[chunk.Snapshots > 10]
            chunk['Start_Point'] = chunk.POLYLINE_Split.map(lambda x: lonlat_convert(x[0]))
            chunk['End_Point'] = chunk.POLYLINE_Split.map(lambda x: lonlat_convert(x[-1]))

            chunk['Trip_Length'] = pd.DataFrame([get_dist(chunk.iloc[i].Start_Point, chunk.iloc[i].End_Point)
                                                 for i in range(len(chunk))])

            chunk['Avg_Speed'] = chunk['Trip_Length']*1000.0 / ((chunk['Snapshots']-1)*15)

            chunk.drop(['POLYLINE', 'POLYLINE_Split', 'Start_Point', 'End_Point'], axis=1, inplace=True)

            sql.to_sql(chunk,
                       name='train_data',
                       con=con_out,
                       index=False,
                       index_label='molecule_id',
                       if_exists='append')

            print 'Chunk {} completed!'.format(count)
            count += 1

        con_in.close()
        con_out.close()

        print "All completed!\n"
Ejemplo n.º 15
0
def load_rates():
	'''
	Puts all the DAWF rates into a sql table
	'''
	try: 
		sql.has_table('DAWF', con=cnx)
		print "Injury rate data already loaded."
	except ValueError:
		sql.to_sql(DAWF_rate, name = 'DAWF', con=cnx)
		print "Injury rate data successfully loaded."
Ejemplo n.º 16
0
def load_aos():
	'''
	Puts all the Area Offices into a sql table
	'''
	try: 
		sql.has_table('AOS', con=cnx)
		print "Area office data already loaded."
	except ValueError:
		sql.to_sql(aos, name = 'AOS', con=cnx)
		print "Area office data successfully loaded."
Ejemplo n.º 17
0
def load_emp():
	'''
	Puts all of the employment data into a sql table
	'''
	try: 
		sql.has_table('allemp', con=cnx)
		print "Employment data already loaded."
	except ValueError:
		sql.to_sql(view_emp, name = 'allemp', con=cnx)
		print "Employment data successfully loaded."
Ejemplo n.º 18
0
def _write_table(tablename, df, conn):
    "writes a dataframe to the sqlite database"

    for col in df.columns:
        if re.search("[()]", col):
            msg = "please follow SQLite column naming conventions: "
            msg += "http://www.sqlite.org/lang_keywords.html"
            raise Exception(msg)

    to_sql(df, name=tablename, con=conn, flavor='sqlite')
Ejemplo n.º 19
0
    def _write_table(self, tablename, df):
        "writes a dataframe to the sqlite database"

        for col in df.columns:
            if re.search("[()]", col):
                msg = "please follow SQLite column naming conventions: "
                msg += "http://www.sqlite.org/lang_keywords.html"
                raise Exception(msg)

        dtype = dict((k, str(v)) for k, v in dict(df.dtypes).items())
        to_sql(df, name=tablename, con=self.conn, flavor="sqlite", dtype=dtype)
Ejemplo n.º 20
0
 def _write_table(self, tablename, df, conn):
     "writes a dataframe to the sqlite database"
     for col in df.columns:
         if re.search("[()]", col):
             msg = "please follow SQLite column naming conventions: "
             msg += "http://www.sqlite.org/lang_keywords.html"
             raise Exception(msg)
     if self.do_log:
         self.log.append("(pandas) create table  %s  ..." % tablename)  
         cards = ','.join(['?'] * len(df.columns))
         self.log.append("(pandas) INSERT INTO  %s  VALUES (%s)"
              % (tablename , cards))        
     to_sql(df, name = tablename, con = self.conn,  flavor = self.engine)
Ejemplo n.º 21
0
    def test_to_sql_replace(self):
        sql.to_sql(self.test_frame1, 'test_frame3',
                   self.conn, flavor='sqlite', if_exists='fail')
        # Add to table again
        sql.to_sql(self.test_frame1, 'test_frame3',
                   self.conn, flavor='sqlite', if_exists='replace')
        self.assertTrue(
            sql.has_table('test_frame3', self.conn, flavor='sqlite'), 'Table not written to DB')

        num_entries = len(self.test_frame1)
        num_rows = self._count_rows('test_frame3')

        self.assertEqual(
            num_rows, num_entries, "not the same number of rows as entries")
Ejemplo n.º 22
0
def test(now):
    import datetime
    from sql import getCursor,insertStock
    from stock import oneStock,getStockList
    import pandas.io.sql as pdsql
    before=now-datetime.timedelta(700)
    v='sh600216'
    df=oneStock(v,starttime=before,endtime=time)
    df=df.iloc[[-1]]
    [conn,cur]=getCursor()
    pdsql.to_sql(df, v, conn,   if_exists='append',index_label='date')
            
        
    return
Ejemplo n.º 23
0
    def test_to_sql_index_label(self):
        temp_frame = DataFrame({'col1': range(4)})
        
        # no index name, defaults to 'pandas_index'
        sql.to_sql(temp_frame, 'test_index_label', self.conn)
        frame = sql.read_table('test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'pandas_index')

        # specifying index_label
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace', index_label='other_label')
        frame = sql.read_table('test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'other_label',
                         "Specified index_label not written to database")

        # using the index name
        temp_frame.index.name = 'index'
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace')
        frame = sql.read_table('test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'index',
                         "Index name not written to database")

        # has index name, but specifying index_label
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace', index_label='other_label')
        frame = sql.read_table('test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'other_label',
                         "Specified index_label not written to database")
Ejemplo n.º 24
0
    def test_roundtrip(self):
        sql.to_sql(self.test_frame1, 'test_frame_roundtrip',
                   con=self.conn, flavor='sqlite')
        result = sql.read_sql(
            'SELECT * FROM test_frame_roundtrip',
            con=self.conn,
            flavor='sqlite')

        # HACK!
        result.index = self.test_frame1.index
        result.set_index('pandas_index', inplace=True)
        result.index.astype(int)
        result.index.name = None
        tm.assert_frame_equal(result, self.test_frame1)
Ejemplo n.º 25
0
    def getData(self):
        if self.storage == "remote":
            print("Getting walk score data from remote source.\nThis takes a while! Please be patient.")

            # Get walk score
            walk_score = list()
            for i in self.lacity_zip:
                zipcode = i[1]
                zip_ID = i[0]
                url = "https://www.walkscore.com/CA/Los_Angeles/"+str(zipcode)
                response = requests.get(url)
                soup = BeautifulSoup(response.content, 'lxml')
                score = soup.find_all('div',{"style":"padding: 0; margin: 0; border: 0; outline: 0; position: absolute; top: 0; bottom: 0; left: 0; right: 0;"})
                walkScoreTag = score[0].find('img')
                walkScore = walkScoreTag.attrs['src'].split('/')[-1].split('.')[0]
                walk_score.append((zipcode,walkScore,zip_ID))

            # Convert walk score list into dataframe
            print("Convert walk score into dataframe")
            walk_score_df = pd.DataFrame(walk_score)
            walk_score_df.columns=['zip_code','walk_score','zip_ID']
            print("walk score dataframe")
            print(walk_score_df.head())

            # Insert walk score dataframe into ReferralCrimeMap.db
            print("Inserting walk score dataframe into ReferralCrimeMap.db")
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            pd_sql.to_sql(walk_score_df, 'WalkScore', conn, if_exists='replace', index=False)
            cur.fetchmany(5)
            conn.close()

        else:
            print("Getting walk score from local source.")

            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            query = '''
                    SELECT *
                    FROM WalkScore
                    '''
            try:
                walkScore = pd.read_sql(query, conn)
                conn.close()
                print(walkScore.head())
            except Exception as e:
                print("There is an error:", e)
                print("Please enter correct data source.")
                exit()
Ejemplo n.º 26
0
    def save(self, filename=None):
        """
        Save the point histories to sqlite3 database.  
        Save the device object properties to a pickle file so the device can be reloaded.
        """
        if filename:
            if ".db" in filename:
                filename = filename.split(".")[0]
            self.properties.db_name = filename
        else:
            self.properties.db_name = "{}".format(self.properties.name)

        # Does file exist? If so, append data
        if os.path.isfile("{}.db".format(self.properties.db_name)):
            his = self._read_from_sql(
                'select * from "{}"'.format("history"), self.properties.db_name
            )
            his.index = his["index"].apply(Timestamp)
            try:
                last = his.index[-1]
                df_to_backup = self.backup_histories_df()[last:]
            except IndexError:
                df_to_backup = self.backup_histories_df()

        else:
            self._log.debug("Creating a new backup database")
            df_to_backup = self.backup_histories_df()

        # DataFrames that will be saved to SQL
        with contextlib.closing(
            sqlite3.connect("{}.db".format(self.properties.db_name))
        ) as con:
            sql.to_sql(
                df_to_backup,
                name="history",
                con=con,
                index_label="index",
                index=True,
                if_exists="append",
            )

        # Saving other properties to a pickle file...
        prop_backup = {}
        prop_backup["device"] = self.dev_properties_df()
        prop_backup["points"] = self.points_properties_df()
        with open("{}.bin".format(self.properties.db_name), "wb") as file:
            pickle.dump(prop_backup, file)

        self._log.info("Device saved to {}.db".format(self.properties.db_name))
Ejemplo n.º 27
0
def oneIndustry(industry,i_c):
    set=pd.DataFrame()
    for id in i_c.index:
        te=__company_info__.query('stock_id=='+str(id))
        set=set.append(te)
    pure=set['outstanding'].sum()*10000
    pure=int(pure)
    total=int((set['outstanding']*set['bvps']).sum()*10000)
    set['percent']=(set['outstanding']*set['bvps']*10000)/total
    count=len(set.index)
    #get src
    starttime=datetime.datetime(2006,1,1)
    endtime=datetime.datetime.now()
    use=[]
    for id in i_c.index:
        te=getSingleStock(transTable(id),starttime,endtime)
        updown(te)
        te.reset_index(inplace=True)
        te['stock_id']=id
        resu=pd.merge(left=te[['stock_id','open','high','close','low','volume','updown','date']],right=set[['stock_id','stock_name','percent']],on='stock_id')
        resu['open']=resu['open']*resu['percent']
        resu['realclose']=resu['close']
        resu['close']=resu['close']*resu['percent']
        resu['high']=resu['high']*resu['percent']
        resu['low']=resu['low']*resu['percent']
        resu['volume']=resu['volume']*resu['percent']
        use.append(resu)
    data=use[0]
    for i in range(1,len(use)):
        data=data.append(use[i])
    data.fillna(0,inplace=True)
    df1=data[['open','high','low','close','volume']].groupby(data['date']).sum()
    df1.reset_index(level=0, inplace=True)
    df2=data[['stock_name','updown','stock_id','date','realclose']].groupby(data['date'],as_index=False).apply(apply_lambda)
    df=pd.merge(left=df1,right=df2,on='date')
    df['industry_name']=industry
    df['pure']=pure
    df['total']=total
    df['companySum']=count
    df = df.rename(columns={'volume': 'volumn', 'stock_name': 'leaderName','stock_id':'leader',
                            'realclose':'leaderPrice','updown':'leaderUpdown'})
    df.set_index('date', drop=True, inplace=True)

    eng=getE()
    pd.DataFrame.replace(df,{float('inf'):0})
    pdsql.to_sql(df, unicode(industry), eng,if_exists='replace',chunksize=200)
    setPrimaryKey(industry)
    return
Ejemplo n.º 28
0
    def test_roundtrip(self):
        # this test otherwise fails, Legacy mode still uses 'pandas_index'
        # as default index column label
        sql.to_sql(self.test_frame1, 'test_frame_roundtrip',
                   con=self.conn, flavor='sqlite')
        result = sql.read_sql(
            'SELECT * FROM test_frame_roundtrip',
            con=self.conn,
            flavor='sqlite')

        # HACK!
        result.index = self.test_frame1.index
        result.set_index('pandas_index', inplace=True)
        result.index.astype(int)
        result.index.name = None
        tm.assert_frame_equal(result, self.test_frame1)
Ejemplo n.º 29
0
    def getLAcityZip(self):
        if self.storage == "remote":
            print("Getting LA City zip code from remote source.")

            # Get all zip codes in Los Angeles City (http://geohub.lacity.org/datasets/875d540d71e64b8696cc368865c2b640_0)
            url="https://opendata.arcgis.com/datasets/875d540d71e64b8696cc368865c2b640_0.geojson"
            r = requests.get(url)
            data = json.loads(r.text)
            lacity_zip = list()

            for i in range(len(data['features'])):
                try:
                    zipcode = data['features'][i]['properties']['ZIPCODE']
                    lacity_zip.append((i+1,zipcode))
                except Exception as e:
                    print(e)
                    continue
            self.lacity_zip = lacity_zip
            print("Total number of LA City zip codes:", len(lacity_zip))

            # Convert the list into dataframe
            lacity_zip_df = pd.DataFrame(lacity_zip, columns=['prime_key', 'zipcode'])

            #Insert the data frame into ReferralCrimeMap.db
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            pd_sql.to_sql(lacity_zip_df, 'LAzip', conn, if_exists='replace', index=False)
            cur.execute('SELECT * FROM LAzip')
            print("Inserting LA zip codes into ReferralCrimeMab.db.")
            print(cur.fetchmany(5))
            conn.close()
        else:
            print("Getting LA City zip code from local source.")
            conn = sq.connect("ReferralCrimeMap.db")
            cur=conn.cursor()
            query = '''
                    SELECT *
                    FROM LAzip
                    '''
            try:
                lacity_zip = pd.read_sql(query, conn)
                conn.close()
                print(lacity_zip.head())
            except Exception as e:
                print("There is an error:" ,e)
                print("Please enter correct data source.")
                exit()
Ejemplo n.º 30
0
def to_sql(df, table_name, if_exists='append', null=mdb.NULL, db=DB):

    # replace NaNs
    if 'timestamp' not in df.columns:
        df.fillna(null, inplace=True)

    # open connection to db
    db = mdb.connect(user="******", host="localhost", port=3306, db=db,
                     charset='utf8')
    # write to db
    try:
        sql.to_sql(df, table_name, db, 'mysql', if_exists)
    except AttributeError:
        sql.write_frame(df, table_name, db, 'mysql', if_exists)

    # close connection to db
    db.close()
Ejemplo n.º 31
0
    def getData(self):
        if self.storage == "remote":
            print(
                " * Getting walk score data from remote source.\nThis takes a few minutes! Thanks for being patient."
            )

            # Get walk score
            walk_score = list()
            for i in self.lacity_zip:
                zipcode = i[1]
                zip_ID = i[0]
                url = "https://www.walkscore.com/CA/Los_Angeles/" + str(
                    zipcode)
                response = requests.get(url)
                soup = BeautifulSoup(response.content, 'lxml')
                score = soup.find_all(
                    'div', {
                        "style":
                        "padding: 0; margin: 0; border: 0; outline: 0; position: absolute; top: 0; bottom: 0; left: 0; right: 0;"
                    })
                walkScoreTag = score[0].find('img')
                walkScore = walkScoreTag.attrs['src'].split('/')[-1].split(
                    '.')[0]
                walk_score.append((zipcode, walkScore, zip_ID))

            # Convert walk score list into dataframe
            print("Convert walk score into dataframe")
            walk_score_df = pd.DataFrame(walk_score)
            walk_score_df.columns = ['zip_code', 'walk_score', 'zip_ID']
            print("walk score dataframe")
            print(walk_score_df.head())

            # Insert walk score dataframe into ReferralCrimeMap.db
            print("Inserting walk score dataframe into ReferralCrimeMap.db")
            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            pd_sql.to_sql(walk_score_df,
                          'WalkScore',
                          conn,
                          if_exists='replace',
                          index=False)
            cur.fetchmany(5)
            conn.close()

        else:
            print("Getting walk score from local source.")

            conn = sq.connect("ReferralCrimeMap.db")
            cur = conn.cursor()
            query = '''
                    SELECT *
                    FROM WalkScore
                    '''
            try:
                walkScore = pd.read_sql(query, conn)
                conn.close()
                print(walkScore.head())
            except Exception as e:
                print("There is an error:", e)
                print("Please enter correct data source.")
                exit()
Ejemplo n.º 32
0
def write_mun_db(conn, CSV_MUNICIPIOS):
    print("Cleaning municipio data")
    crime_municipios = CrimeMunicipios(os.path.join('snsp-data', CSV_MUNICIPIOS))
    print("Writing metada to db")
    # has to be append to preserve the foreign keys
    pd_sql.to_sql(crime_municipios.tipo, 'tipo_municipios', conn, if_exists='append', index=False)
    pd_sql.to_sql(crime_municipios.subtipo, 'subtipo_municipios', conn, if_exists='append', index=False)
    pd_sql.to_sql(crime_municipios.modalidad, 'modalidad_municipios', conn, if_exists='append', index=False)
    pd_sql.to_sql(crime_municipios.bien_juridico, 'bien_juridico_municipios', conn, if_exists='append', index=False)
    pd_sql.to_sql(crime_municipios.municipios, 'municipio_names', conn, if_exists='append', index=False)
    pd_sql.to_sql(crime_municipios.population, 'population_municipios', conn, if_exists='append', index=False, chunksize=20000)
    print("Writing municipio data to db")
    # Temporary fix because the SNSP duplicated (with different values) the data for 2055 TRINIDAD ZAACHILA
    #crime_municipios.data = crime_municipios.data[(crime_municipios.data['state_code'] != 22) & (crime_municipios.data['mun_code'] != 555)]
    pd_sql.to_sql(crime_municipios.data, 'municipios_fuero_comun', conn, if_exists='append', index=False, chunksize=1000000)
    print("End writing municipio data to db")
Ejemplo n.º 33
0
Archivo: sql.py Proyecto: cccoast/crazy
engine = create_engine(r'mysql+mysqldb://root:123@localhost/employees')
sqldf = psql.read_sql(query1, engine)
print sqldf
engine = create_engine(r'mysql+mysqldb://root:123@localhost/employees')
sqldf = psql.read_sql(query3, engine, chunksize=10)
for i, idf in enumerate(sqldf):
    print i, idf
    if i == 3:
        break

engine = create_engine(r'mysql+mysqldb://root:123@localhost/employees')
df = pd.DataFrame(np.random.random(100).reshape(10, 10),
                  index=range(10),
                  columns=range(10))
print df
psql.to_sql(df, 'test_table', engine)
engine = create_engine(r'mysql+mysqldb://root:123@localhost/employees')
df = pd.DataFrame({'dept_no': 'd013', 'dept_name': 'Cat'}, index=range(1))
print df
df.to_sql('departments', engine, if_exists='append', index=False)
'''META DATA'''
engine = create_engine(r'mysql+mysqldb://root:123@localhost/employees')
metadata = MetaData(engine)
my_table = Table('cat', metadata, autoload=True)
my_table = Table('cat', metadata, Column('age', Integer),
                 Column('name', String(40)), Column('color', String(40)))
my_table.create()
insert = my_table.insert()
insert.execute(age=1, name='mimi', color='yellow')
'''ORM'''
Base = declarative_base()
Ejemplo n.º 34
0
def write_state_db(conn, CSV_ESTADOS):
    crime_states = CrimeStates(os.path.join('snsp-data', CSV_ESTADOS))
    # has to be append to preserve the foreign keys
    pd_sql.to_sql(crime_states.tipo, 'tipo_states', conn, if_exists='append', index=False)
    pd_sql.to_sql(crime_states.subtipo, 'subtipo_states', conn, if_exists='append', index=False)
    pd_sql.to_sql(crime_states.modalidad, 'modalidad_states', conn, if_exists='append', index=False)
    # pd_sql.to_sql(crime_states.state_codes, 'state_names', conn, if_exists='append', index=False)
    pd_sql.to_sql(crime_states.bien_juridico, 'bien_juridico_states', conn, if_exists='append', index=False)
    pd_sql.to_sql(crime_states.population, 'population_states', conn, if_exists='append', index=False, chunksize=20000)
    pd_sql.to_sql(crime_states.data, 'estados_fuero_comun', conn, if_exists='append', index=False, chunksize=20000)