Example #1
0
class WriteSQLDtypes(object):

    goal_time = 0.2
    params = (['sqlalchemy', 'sqlite'],
              ['float', 'float_with_nan', 'string', 'bool', 'int', 'datetime'])
    param_names = ['connection', 'dtype']

    def setup(self, connection, dtype):
        N = 10000
        con = {'sqlalchemy': create_engine('sqlite:///:memory:'),
               'sqlite': sqlite3.connect(':memory:')}
        self.table_name = 'test_type'
        self.query_col = 'SELECT {} FROM {}'.format(dtype, self.table_name)
        self.con = con[connection]
        self.df = DataFrame({'float': np.random.randn(N),
                             'float_with_nan': np.random.randn(N),
                             'string': ['foo'] * N,
                             'bool': [True] * N,
                             'int': np.random.randint(0, N, size=N),
                             'datetime': date_range('2000-01-01',
                                                    periods=N,
                                                    freq='s')},
                            index=tm.makeStringIndex(N))
        self.df.loc[1000:3000, 'float_with_nan'] = np.nan
        self.df['datetime_string'] = self.df['datetime'].astype(str)
        self.df.to_sql(self.table_name, self.con, if_exists='replace')

    def time_to_sql_dataframe_column(self, connection, dtype):
        self.df[[dtype]].to_sql('test1', self.con, if_exists='replace')

    def time_read_sql_query_select_column(self, connection, dtype):
        read_sql_query(self.query_col, self.con)
Example #2
0
def get_link(url):
	link_exr = re.compile(r'<a.*?\s*href=\"(.*?)\".*?>(.*?)</a>')
	links = []
	
	# open web content
	f = urllib2.urlopen(url)
	content = f.read()
	
	# versi find html tag : find all url and save to links
	# soup = BeautifulSoup(content, "lxml")
	# for a in soup.find_all('a', href=True):
	# 	if "detik.com" in a['href']:
	# 		if "http:" not in a['href']:
	# 			a['href'] = "http:" + a['href']
	# 		print "Found the URL:", a['href']
	# 		links.append(a['href'])
			
	# versi regex : find all url and save to links			
	for link in link_exr.findall(content):
		if "detik.com" in link[0]:
			link_detik = link[0]
			if "http:" not in link_detik:
				link_detik = "http:" + link_detik
			links.append(link_detik)
	
	# save to DataFrame
	df = DataFrame(links, columns=['detik url'])
	df.drop_duplicates()

	print df.head(0)
		# create and save to sqlite database
	detik_db = create_engine("mysql://root:root@localhost/data_detik") 
	df.to_sql('url_detik', detik_db, if_exists='replace')
Example #3
0
class ReadSQLTable(object):

    goal_time = 0.2

    def setup(self):
        N = 10000
        self.table_name = 'test'
        self.con = create_engine('sqlite:///:memory:')
        self.df = DataFrame({'float': np.random.randn(N),
                             'float_with_nan': np.random.randn(N),
                             'string': ['foo'] * N,
                             'bool': [True] * N,
                             'int': np.random.randint(0, N, size=N),
                             'datetime': date_range('2000-01-01',
                                                    periods=N,
                                                    freq='s')},
                            index=tm.makeStringIndex(N))
        self.df.loc[1000:3000, 'float_with_nan'] = np.nan
        self.df['datetime_string'] = self.df['datetime'].astype(str)
        self.df.to_sql(self.table_name, self.con, if_exists='replace')

    def time_read_sql_table_all(self):
        read_sql_table(self.table_name, self.con)

    def time_read_sql_table_parse_dates(self):
        read_sql_table(self.table_name, self.con, columns=['datetime_string'],
                       parse_dates=['datetime_string'])
Example #4
0
    def to_mysql(self):

        try:

            df = DataFrame({'user_id':[self.user_id],
                            'user_name':[self.user_name],
                            'title':[self.title],
                            'detail': [self.detail],
                            'publish_time':[self.publish_time],
                            'device':[self.device],
                            'href':[self.href],
                            'repost_count':[self.repost_count],
                            'donate_count':[self.donate_count],
                            'comment_count':[self.comment_count]
                            },
                           columns=['user_id', 'user_name', 'title', 'detail',
                                    'publish_time', 'repost_count', 'donate_count',
                                    'comment_count', 'device', 'href'])
            print df

            try:
                sql_del = "delete from {table} where user_id='{user_id}' and detail='{detail}' and publish_time='{publish_time}'".format(
                        table = mysql_table_xueqiu_article,
                        user_id = self.user_id,
                        detail = self.detail,
                        publish_time = self.publish_time
                        )
                engine.execute(sql_del)
            except Exception,e:
                print 'delete error! ', str(e)

            df.to_sql(mysql_table_xueqiu_article, engine, if_exists='append', index=False)
            return True
Example #5
0
    def to_mysql(self):

        try:

            df = DataFrame({'user_id':[self.user_id],
                            'name':[self.name],
                            'sex': [self.sex],
                            'area':[self.area],
                            'stock_count':[self.stock_count],
                            'talk_count':[self.talk_count],
                            'fans_count':[self.fans_count],
                            'big_v_in_fans_count':0,
                            'follows_count':0,
                            'capacitys':[self.capacitys],
                            'summary':[self.summary],
                            'follow_search_time':'',
                            'update_time':[self.update_time]
                            },
                           columns=['user_id', 'name', 'sex', 'area', 'stock_count', 'talk_count',
                                    'fans_count', 'big_v_in_fans_count', 'follows_count', 'capacitys',
                                    'summary', 'follow_search_time', 'update_time'])
            print df
            df.to_sql(big_v_table_mysql, engine, if_exists='append', index=False)
            return True
        except Exception,e:
            print e
            return False
Example #6
0
    def get_publish_articles(self):

        t1 = time.time()
        print 'begin query...'
        #sql = 'select distinct user_id from %s where user_id not in (select distinct user_id from %s)' % (big_v_table_mysql, archive_table_mysql)
        #df = pd.read_sql_query(sql, engine)
        #user_ids = df['user_id'].get_values()
        sql1 = 'select distinct user_id from %s where fans_count > 1000 and fans_count < 10001 ' % (big_v_table_mysql)
        sql2 = 'select distinct user_id from %s' % archive_table_mysql
        df1 = pd.read_sql_query(sql1, engine)
        df2 = pd.read_sql_query(sql2, engine)
        user_ids1 = df1['user_id'].get_values()
        user_ids2 = df2['user_id'].get_values()
        user_ids = [id for id in set(user_ids1).difference(user_ids2)]
        t2 = time.time()
        print 'query mysql by join cose:', t2-t1, 's'

        for user_id in user_ids:
            try:
                self.get_publish_articles_by_id(user_id)
            except Exception, e:
                se = Series([user_id, GetNowTime(), str(e)], index=['user_id', 'fail_time', 'fail_reason'])
                df = DataFrame(se).T
                df.to_sql(unfinish_arcticle_table_mysql, engine, if_exists='append', index=False)
                print e
Example #7
0
def insert_co_exp_ids(profile_ids, modalities, db_con_1, db_con_2):
    """
    Scan high frequency type modalities
    and extract mutually existent exp ids.
    Save it to CSV file.
    """
    print('\twriting co_exp_ids for sensor data')
    for profile_id in profile_ids:
        high_interval_mods = filter(lambda x: info.MOD_FREQ_TYPE[x] == info.FREQ_HIGH, modalities)
        co_exp_ids = []
        for mod in high_interval_mods:
            exp_ids = loader.load_exp_ids(profile_id, mod, filtered=False, server_index=1, db_con=db_con_1, close=False)
            if len(exp_ids) > 0:
                co_exp_ids.append(pd.DataFrame([0] * len(exp_ids), index=exp_ids, columns=[mod]))
        co_exp_ids = pd.concat(co_exp_ids, axis=1)
        co_exp_ids = co_exp_ids.dropna()
        co_exp_ids = list(co_exp_ids.index)
        co_exp_ids.sort()

        done_ids = loader.load_co_exp_ids(profile_id, db_con=db_con_2, close=False)
        co_exp_ids = filter(lambda x: x not in done_ids, co_exp_ids)
        if len(co_exp_ids) == 0:
            print profile_id, "all co_exp_ids are already inserted!"
            continue

        df = DataFrame(co_exp_ids, columns=['expId'])
        df['profile_id'] = profile_id
        df.to_sql("co_exp_ids", db_con_2, flavor='mysql', if_exists='append', index=False)
        print('\t\t%s number of exp ids of user %s are successfully inserted!' % (len(df), profile_id))
Example #8
0
 def send_to_db(self):
     conn = sqlite3.connect('data2.sqlite', timeout=30)
     c = conn.cursor()
     df = DataFrame(self.__dict__.items(), index=self.__dict__.keys())
     df = df.drop(0,1)
     df = df.transpose()
     df = df.sort(axis=1)
     df.to_sql('earnings_calendar', conn, if_exists='append', index=False)
Example #9
0
def save_to_database(df: pd.DataFrame, table_name: str):
    con = mysql.connector.connect(
            host='ec2-34-245-208-245.eu-west-1.compute.amazonaws.com',
            database='pregnaware',
            user='pregnaware',
            password=os.environ['DB_PREGNAWARE_PWD'])

    df.to_sql(con=con, name=table_name, flavor='mysql', if_exists='append', index=False)
Example #10
0
    def test_mixed_dtype_insert(self):
        # see GH6509
        s1 = Series(2**25 + 1,dtype=np.int32)
        s2 = Series(0.0,dtype=np.float32)
        df = DataFrame({'s1': s1, 's2': s2})

        # write and read again
        df.to_sql("test_read_write", self.conn, index=False)
        df2 = sql.read_table("test_read_write", self.conn)

        tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True)
Example #11
0
    def _big_v_in_fans_to_sql(self, followList, id):

        try:
            df = DataFrame({'user_id':followList, #被关注者
                            'fans_id':id                            #关注者
                            }, columns=['user_id', 'fans_id'])
            print df[:10]
            df.to_sql(fans_in_big_v_table_mysql, engine, if_exists='append', index=False)

        except Exception,e:
            print e
Example #12
0
def insert_stat_types():
    mysql_con = mdb.connect(info.HOST_2, info.ID, info.PWD, info.DB_NAME_2)
    df = pd.read_sql("select * from info_statistics where statType = 'basic'", mysql_con, index_col='id')
    new_dict = {'statistics': [], 'statType': [], 'valueType': []}
    for i in range(10):
        for index, st_series in df.iterrows():
            new_dict['statistics'].append("cat%sdur%s" % (i, str(st_series['statistics'][0]).capitalize()
                                                          + st_series['statistics'][1:]))
            new_dict['statType'].append('valueDuration')
            new_dict['valueType'].append(st_series['valueType'])
    new_df = DataFrame(new_dict)
    new_df.to_sql("info_statistics", mysql_con, flavor='mysql', if_exists='append', index=False)
Example #13
0
    def to_mysql(self):

        try:

            df = DataFrame(
                {
                    "user_id": [self.user_id],
                    "user_name": [self.user_name],
                    "title": [self.title],
                    "detail": [self.detail],
                    "publish_time": [self.publish_time],
                    "href": [self.href],
                    "watch_count": [self.watch_count],
                    "repost_count": [self.repost_count],
                    "donate_count": [self.donate_count],
                    "comment_count": [self.comment_count],
                    #'is_top':[self.is_top],
                    #'is_repost':[self.is_repost],
                    #'repost_reason':[self.repost_reason
                    "device": [self.device],  # ]
                },
                columns=[
                    "user_id",
                    "user_name",
                    "title",
                    "detail",
                    "publish_time",
                    "href",
                    "watch_count",
                    "repost_count",
                    "donate_count",
                    "comment_count",
                    "device",
                ],
            )
            print df

            try:
                sql_del = "delete from {table} where user_id='{user_id}' and title='{title}' and publish_time='{publish_time}'".format(
                    table=mysql_table_licaishi_viewpoint,
                    user_id=self.user_id,
                    title=self.title,
                    publish_time=self.publish_time,
                )
                engine.execute(sql_del)
            except Exception, e:
                print "delete error! ", str(e)

            df.to_sql(mysql_table_licaishi_viewpoint, engine, if_exists="append", index=False)
            return True
def _downloadFqFactor(codes):
    factorDF = DataFrame()
    for code in codes:
        logging.info("Downloading %s fq factor."%code)
        df = ts.stock.trading._parase_fq_factor(code,'','')
        df.insert(0,"code",code,True)
        df = df.drop_duplicates('date').set_index('date')
        factorDF = pd.concat([factorDF, df])
        if conf.DEBUG:
            break

    logging.info("Deleting fq factor.")
    utils.executeSQL("delete from t_daily_fqFactor")
    logging.info("Saving fq factor.")
    factorDF.to_sql(name='t_daily_fqFactor',con=utils.getEngine(), if_exists="append",chunksize=20000)
    logging.info("Saved fq factor.")
def test_aliased_views_with_computation():
    engine = sa.create_engine('sqlite:///:memory:')

    df_aaa = DataFrame({'x': [1, 2, 3, 2, 3],
                        'y': [2, 1, 2, 3, 1],
                        'z': [3, 3, 3, 1, 2]})
    df_bbb = DataFrame({'w': [1, 2, 3, 2, 3],
                        'x': [2, 1, 2, 3, 1],
                        'y': [3, 3, 3, 1, 2]})

    df_aaa.to_sql('aaa', engine)
    df_bbb.to_sql('bbb', engine)

    metadata = sa.MetaData(engine)
    metadata.reflect()

    sql_aaa = metadata.tables['aaa']
    sql_bbb = metadata.tables['bbb']

    L = Symbol('aaa', discover(df_aaa))
    R = Symbol('bbb', discover(df_bbb))

    expr = join(by(L.x, y_total=L.y.sum()),
                R)
    a = compute(expr, {L: df_aaa, R: df_bbb})
    b = compute(expr, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr2 = by(expr.w, count=expr.x.count(), total2=expr.y_total.sum())
    a = compute(expr2, {L: df_aaa, R: df_bbb})
    b = compute(expr2, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr3 = by(expr.x, count=expr.y_total.count())
    a = compute(expr3, {L: df_aaa, R: df_bbb})
    b = compute(expr3, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr4 = join(expr2, R)
    a = compute(expr4, {L: df_aaa, R: df_bbb})
    b = compute(expr4, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    """ # Takes a while
Example #16
0
def insert_data_types():
    mysql_con = mdb.connect(info.HOST_2, info.ID, info.PWD, info.DB_NAME_2)
    df = {'modality': [], 'field': [], 'variableType': [], 'permissionFree': [], 'category': [],
          'collectionFrequency': [], 'collectionDuration': [], 'sensitiveSensor': []}
    for mod in info.FREE_MODE_LIST:
        fields = info.MOD_FIELD_LIST[mod]
        for f in fields:
            df['modality'] += [mod]
            df['field'] += [f]
            df['variableType'] += [info.MOD_FIELD_TYPE[mod][f].split("_type")[0]]
            df['permissionFree'] += ['yes'] if f in info.PERMISSION_FREE[mod] else ['no']
            df['category'] += [info.MOD_CATEGORY[mod].split("Probe")[0]]
            df['collectionFrequency'] += [info.MOD_FREQ_TYPE[mod].split("_frequency")[0]]
            df['collectionDuration'] += [info.MOD_COL_TYPE[mod].split("_type")[0]]
            df['sensitiveSensor'] += ['yes'] if mod in info.SENSITIVE_MOD else ['no']

    df = DataFrame(df, columns=['modality', 'field', 'variableType', 'permissionFree', 'category',
                                'collectionFrequency', 'collectionDuration', 'sensitiveSensor'])
    df.to_sql("dataTypes", mysql_con, flavor='mysql', if_exists='append', index=False)
    def _write(self, tablename, expected_dtypes, frame):
        if frame is None or frame.empty:
            # keeping the dtypes correct for empty frames is not easy
            frame = DataFrame(
                np.array([], dtype=list(expected_dtypes.items())),
            )
        else:
            if tablename == 'shares':
                pass
            else:
                if frozenset(frame.columns) != frozenset(expected_dtypes):
                    raise ValueError(
                        "Unexpected frame columns:\n"
                        "Expected Columns: %s\n"
                        "Received Columns: %s" % (
                            set(expected_dtypes),
                            frame.columns.tolist(),
                        )
                    )

            actual_dtypes = frame.dtypes
            for colname, expected in iteritems(expected_dtypes):
                actual = actual_dtypes[colname]
                if not issubdtype(actual, expected):
                    raise TypeError(
                        "Expected data of type {expected} for column"
                        " '{colname}', but got '{actual}'.".format(
                            expected=expected,
                            colname=colname,
                            actual=actual,
                        ),
                    )

        frame.to_sql(
            tablename,
            self.conn,
            if_exists='append',
            chunksize=50000,
        )
Example #18
0
def insert_from_cp(con):
    instCpStockCode = win32com.client.Dispatch("CpUtil.CpStockCode")
    instStockMst = win32com.client.Dispatch("dscbo1.StockMst")
    instCpCodeMgr = win32com.client.Dispatch("CpUtil.CpCodeMgr")

    code_data = {'CODE':[],
                 'NAME':[],
                 'TYPE':[],
                 'PER':[],
                 'BPS':[], #PRICE / BPS
                 'INDUSTRY_CODE':[],
                 'INDUSTRY':[]}

    total = instCpStockCode.GetCount()
    for i in range(0, total):
        code = instCpStockCode.GetData(CPSTOCKCODE_CODE, i)
        name = instCpStockCode.GetData(CPSTOCKCODE_NAME, i)
        code_data['CODE'].append(code)
        code_data['NAME'].append(name)
        
        instStockMst.SetInputValue(0, code)
        instStockMst.BlockRequest()
        type = instStockMst.GetHeaderValue(CPSTOCKMST_CATEGORY)
        per = instStockMst.GetHeaderValue(CPSTOCKMST_PER)
        bps = instStockMst.GetHeaderValue(CPSTOCKMST_BPS)
        industry_code = instStockMst.GetHeaderValue(CPSTOCKMST_INDUSTRY_CODE)
        industry_name = instCpCodeMgr.GetIndustryName(industry_code)

        code_data['TYPE'].append(type)
        code_data['PER'].append(per)
        code_data['BPS'].append(bps)
        code_data['INDUSTRY_CODE'].append(industry_code)
        code_data['INDUSTRY'].append(industry_name)

        get_logger().debug("{}/{} {} {} {} {} {}".format(i, total,code, name, type, per, bps))

    data = DataFrame(code_data)
    data.to_sql("CODE", con, if_exists='replace', chunksize=1000)
    get_logger().debug("{} 주식 종목 코드를 저장 하였습니다.".format(len(data)))
Example #19
0
def run():
    with sqlite3.connect("price.db") as con:
        cursor = con.cursor()

        def _make_long_date(date):
            return date.year * 10000 + date.month * 100 + date.day

        for code, name in get_code_list():

            table_name = code
            
            recent_date = get_last_update_date(table_name)
            if recent_date is None: recent_date = START_DATE

            if recent_date.date() == datetime.now().date(): continue

            if recent_date.weekday() == 4 and datetime.now() - recent_date < timedelta(hours=64): continue

            start_date = (recent_date + timedelta(days=1))
            end_date = datetime.now()

            if datetime.now().hour < 16: end_date = datetime.now() - timedelta(days=1)

            if start_date > end_date: continue

            instStockChart = win32com.client.Dispatch("CpSysDib.StockChart")

            instStockChart.SetInputValue(CPSTOCKCHART_REQ_CODE, code)
            instStockChart.SetInputValue(CPSTOCKCHART_REQ_DATE_OR_COUNT, CPSTOCKCHART_REQ_PARAM_DATE)

            instStockChart.SetInputValue(CPSTOCKCHART_REQ_END_DATE,_make_long_date(end_date))
            instStockChart.SetInputValue(CPSTOCKCHART_REQ_START_DATE,_make_long_date(start_date))

            instStockChart.SetInputValue(CPSTOCKCHART_REQ_FIELD, 
                                            [CPSTOCKCHART_REQ_PARAM_FIELD_DATE,
                                            CPSTOCKCHART_REQ_PARAM_FIELD_OPEN,
                                            CPSTOCKCHART_REQ_PARAM_FIELD_HIGH,
                                            CPSTOCKCHART_REQ_PARAM_FIELD_LOW,
                                            CPSTOCKCHART_REQ_PARAM_FIELD_CLOSE,
                                            CPSTOCKCHART_REQ_PARAM_FIELD_VOLUME])
            instStockChart.SetInputValue(CPSTOCKCHART_REQ_TYPE, CPSTOCKCHART_REQ_TYPE_PARAM_DAY)
            instStockChart.SetInputValue(CPSTOCKCHART_REQ_ADJ, CPSTOCKCHART_REQ_ADJ_PARAM_ADJUST)

            instStockChart.BlockRequest()

            numData = instStockChart.GetHeaderValue(CPSTOCKCHART_RES_DATA_COUNT)
            price_data = {'DATE':[],
                          'OPEN':[],
                          'HIGH':[],
                          'LOW':[],
                          'CLOSE':[],
                          'VOLUME':[]}

            # cybos plus 최근데이터 부터 온다
            for i in reversed(range(numData)):
                
                long_date = instStockChart.GetDataValue(0, i)
                year = int(long_date / 10000)
                month = int(long_date / 100) % 100
                day = long_date % 100
                dateval = datetime(year, month, day, 0, 0, 0)

                open = instStockChart.GetDataValue(1, i)
                high = instStockChart.GetDataValue(2, i)
                low = instStockChart.GetDataValue(3, i)
                close = instStockChart.GetDataValue(4, i)
                volume = instStockChart.GetDataValue(5, i)

                price_data['DATE'].append(dateval)
                price_data['OPEN'].append(open)
                price_data['HIGH'].append(high)
                price_data['LOW'].append(low)
                price_data['CLOSE'].append(close)
                price_data['VOLUME'].append(volume)

            price = DataFrame(price_data)
            price.to_sql(table_name, con, if_exists='append', chunksize=1000)
            get_logger().debug("{} 종목의 {}데이터를 저장 하였습니다.".format(code, len(price)))

            #remove old data
            row = cursor.execute("SELECT COUNT(*) FROM '{}'".format(table_name)).fetchone()
            if row[0] > 600:
                row = cursor.execute("DELETE FROM '{}' WHERE DATE = (SELECT MIN(DATE) FROM '{}')".format(table_name, table_name))
Example #20
0
if __name__ == '__main__':
    db_name = sys.argv[1]
    db_user = sys.argv[2]
    db_pass = sys.argv[3]

    # Load data for each page
    con = db.connect('localhost',db_user,db_pass,db_name)
    query = 'SELECT rev_user, rev_ip, page_id, page_namespace, name FROM revision ' + \
        'INNER JOIN page ON page.page_id=revision.rev_page ' + \
        'INNER JOIN namespaces ON page.page_namespace=namespaces.code'
    data     = read_sql(query, con)
    con.close()

    # Map IP addresses to country codes
    data['country'] = data['rev_ip'].apply(lambda ip: getCountryCode(ip))
    data['country'] = data['country'].fillna('Unknown')

    # Create country contributions for each page
    con = db.connect('localhost',db_user,db_pass,db_name)
    for page_id, page_revs in data.groupby('page_id'):
        nRevs = len(page_revs)
        cRevs = page_revs.groupby('country').size() / nRevs
    
        # Insert into country_contrib
        # Values: page_id, country (cRevs.keys()), contributions(cRevs.keys())
        df = DataFrame(cRevs, columns=['contribution'] )
        df['page_id'] = page_id
        df.to_sql(con=con, name='country_contrib', if_exists='append', flavor='mysql')
    
    con.close()
Example #21
0
class SQLDFTest(unittest.TestCase):

    def setUp(self):
        self.default_df = DataFrame(
            [["l1", 1, 2], ["l2", 3, 4], ["l3", 4, 5]], columns=["label", "c1", "c2"])
        self.default_env = {"a": 1, "df": self.default_df}
        self.default_udfs = {"udf1": lambda x: x}

        class udaf1(object):

            def __init__(self):
                self.count = 0

            def step(self, x):
                self.count += 1

            def finalize(self):
                return self.count
        self.default_udafs = {"udaf1": udaf1}

    def tearDown(self):
        pass

    def test_constructor_with_default(self):
        sqldf = SQLDF(self.default_env)
        self.assertEqual(isinstance(sqldf, SQLDF), True)
        self.assertEqual(sqldf.env, self.default_env)
        self.assertEqual(sqldf.inmemory, True)
        self.assertEqual(sqldf._dbname, ":memory:")
        self.assertEqual(sqldf.udfs, {})
        self.assertEqual(sqldf.udafs, {})
        self.assertEqual(isinstance(sqldf.conn, sqlite3.Connection), True)

    def test_constructor_with_assign(self):
        sqldf = SQLDF(
            self.default_env,
            inmemory=False,
            udfs=self.default_udfs,
            udafs=self.default_udafs)
        self.assertEqual(isinstance(sqldf, SQLDF), True)
        self.assertEqual(sqldf.env, self.default_env)
        self.assertEqual(sqldf.inmemory, False)
        self.assertEqual(sqldf._dbname, ".pysqldf.db")
        self.assertEqual(sqldf.udfs, self.default_udfs)
        self.assertEqual(sqldf.udafs, self.default_udafs)
        self.assertEqual(isinstance(sqldf.conn, sqlite3.Connection), True)

    def test_destructor_with_inmemory_db(self):
        sqldf = SQLDF(self.default_env)
        conn = sqldf.conn
        self.assertRaises(
            sqlite3.OperationalError,
            lambda: conn.execute("select * from tbl;"))
        sqldf = None  # destruct
        self.assertRaises(
            sqlite3.ProgrammingError,
            lambda: conn.execute("select * from tbl;"))

    def test_destructor_with_fs_db(self):
        sqldf = SQLDF(self.default_env, inmemory=False)
        conn = sqldf.conn
        self.assertRaises(
            sqlite3.OperationalError,
            lambda: conn.execute("select * from tbl;"))
        self.assertEqual(os.path.exists(".pysqldf.db"), True)
        sqldf = None  # destruct
        self.assertRaises(
            sqlite3.ProgrammingError,
            lambda: conn.execute("select * from tbl;"))
        self.assertEqual(os.path.exists(".pysqldf.db"), False)

    def test_execute_method(self):
        sqldf = SQLDF(self.default_env)
        query = "select * from df;"
        result = sqldf.execute(query)
        assert_frame_equal(result, self.default_df)
        # table deleted
        self.assertRaises(
            sqlite3.OperationalError,
            lambda: sqldf.conn.execute(query))

    def test_execute_method_returning_none(self):
        births = load_births()
        result = SQLDF(locals()).execute(
            "select a from births limit 10;")  # col a not exists
        self.assertEqual(result, None)

    def test_execute_method_with_table_not_found(self):
        sqldf = SQLDF(self.default_env)
        self.assertRaises(
            Exception,
            lambda: sqldf.execute("select * from notable"))
        # table deleted
        self.assertRaises(sqlite3.OperationalError,
                          lambda: sqldf.conn.execute("select * from df;"))

    def test_execute_method_with_query_error(self):
        sqldf = SQLDF(self.default_env)
        self.assertEqual(sqldf.execute("select a from df uuuuuu;"), None)
        # table deleted
        self.assertRaises(sqlite3.OperationalError,
                          lambda: sqldf.conn.execute("select * from df;"))

    def test_extract_table_names_method(self):
        sqldf = SQLDF(self.default_env)
        tablenames = {
            "select * from df;": ["df"],
            "select * from df": ["df"],
            "select * from _": ["_"],
            "select * from 11;": [],
            "select * from 1ab;": [],
            "select * from a-b;": [],
            "select * from a.b;": [],
            "select a;": [],
            "select * from (select * from subq_df) f;": ["subq_df"],
            "select * from df d1 inner join df2 d2 on d1.id = d2.id;": ["df", "df2"],
            "select a, b c from df where c in (select foo from df2 inner join df3 on df2.id = df3.id);": ["df", "df2", "df3"],
            "select * from df where a in (select a from (select c from df2 where c in (select a from df3 inner join df4 on df3.id = df4.id)));": ["df", "df2", "df3", "df4"]
        }
        for query, tablename in tablenames.items():
            self.assertEqual(
                set(sqldf._extract_table_names(query)), set(tablename))

    def test_ensure_data_frame_method_nested_list(self):
        data = [[1, 2, 3], [4, 5, 6]]
        result = SQLDF(locals())._ensure_data_frame(data, "df")
        self.assertEqual(len(result), 2)
        self.assertEqual(list(result.columns), ["c0", "c1", "c2"])
        self.assertEqual(list(result.index), [0, 1])

    def test_ensure_data_frame_method_list_of_tuple(self):
        data = [(1, 2, 3), (4, 5, 6)]
        result = SQLDF(locals())._ensure_data_frame(data, "df")
        self.assertEqual(len(result), 2)
        self.assertEqual(list(result.columns), ["c0", "c1", "c2"])
        self.assertEqual(list(result.index), [0, 1])

    def test_ensure_data_frame_method_nested_tuple(self):
        data = ((1, 2, 3), (4, 5, 6))
        sqldf = SQLDF(locals())
        self.assertRaises(
            Exception,
            lambda: sqldf._ensure_data_frame(
                data,
                "df"))

    def test_ensure_data_frame_method_tuple_of_list(self):
        data = ([1, 2, 3], [4, 5, 6])
        sqldf = SQLDF(locals())
        self.assertRaises(
            Exception,
            lambda: sqldf._ensure_data_frame(
                data,
                "df"))

    def test_ensure_data_frame_method_list_of_dict(self):
        data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]
        result = SQLDF(locals())._ensure_data_frame(data, "df")
        self.assertEqual(len(result), 2)
        self.assertEqual(list(result.columns), ["a", "b", "c"])
        self.assertEqual(list(result.index), [0, 1])

    def test_write_table_method(self):
        df = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"])
        sqldf = SQLDF(locals())
        sqldf._write_table("tbl", df)
        # table created
        cursor = sqldf.conn.cursor()
        sq_type, name, tbl_name, rootpage, sql = list(
            cursor.execute("select * from sqlite_master where type='table';"))[0]
        self.assertEqual(name, "tbl")

    def test_write_table_method_col_with_left_bracket(self):
        df = DataFrame([[1]], columns=["col("])
        sqldf = SQLDF(locals())
        self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df))

    def test_write_table_method_col_with_right_bracket(self):
        df = DataFrame([[1]], columns=["co)l"])
        sqldf = SQLDF(locals())
        self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df))

    def test_write_table_method_garbage_table(self):
        df = [[1, 2], [3, [4]]]
        sqldf = SQLDF(locals())
        self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df))
        # table destroyed
        cursor = sqldf.conn.cursor()
        tablemaster = list(cursor.execute("select * from sqlite_master where type='table';"))
        self.assertEqual(tablemaster, [])

    def test_del_table_method(self):
        sqldf = SQLDF(locals())
        cursor = sqldf.conn.cursor()
        # create table
        cursor.execute("create table deltbl(col);")
        sqldf._del_table(["deltbl"])
        self.assertEqual(
            list(
                cursor.execute("select * from sqlite_master where type='table';")),
            [])

    def test_del_table_method_not_exist_table(self):
        sqldf = SQLDF(locals())
        self.assertRaises(
            sqlite3.OperationalError,
            lambda: sqldf._del_table(
                ["deltblaaaaaaa"]))

    def test_set_udf_method(self):
        sqldf = SQLDF(locals())
        conn = sqldf.conn
        self.default_df.to_sql("df", conn)
        sqldf._set_udf(self.default_udfs)
        self.assertEqual(
            list(
                conn.execute("select udf1(label) from df;")), [
                ("l1",), ("l2",), ("l3",)])

    def test_set_udaf_method_with_agg_class(self):
        sqldf = SQLDF(locals())
        conn = sqldf.conn
        self.default_df.to_sql("df", conn)
        sqldf._set_udaf(self.default_udafs)
        self.assertEqual(
            list(
                conn.execute("select udaf1(label) from df;")), [
                (3,)])

    def test_set_udaf_method_with_agg_function(self):
        sqldf = SQLDF(locals())
        conn = sqldf.conn
        self.default_df.to_sql("df", conn)

        def agg_func(values):
            return len(values)
        sqldf._set_udaf({"mycount": agg_func})
        self.assertEqual(
            list(
                conn.execute("select mycount(label) from df;")), [
                (3,)])

    def test_udf(self):
        data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]

        def ten(x):
            return 10
        result = SQLDF(locals(), udfs={"ten": ten}).execute(
            "SELECT ten(a) AS ten FROM data;")
        self.assertEqual(len(result), 2)
        self.assertEqual(list(result.columns), ["ten"])
        self.assertEqual(list(result.index), [0, 1])
        self.assertEqual(list(result["ten"]), [10, 10])

    def test_udaf(self):
        data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]

        class mycount(object):

            def __init__(self):
                super(mycount, self).__init__()
                self.count = 0

            def step(self, x):
                self.count += x

            def finalize(self):
                return self.count
        result = SQLDF(locals(), udafs={"mycount": mycount}).execute(
            "select mycount(a) as mycount from data;")
        self.assertEqual(len(result), 1)
        self.assertEqual(list(result.columns), ["mycount"])
        self.assertEqual(list(result.index), [0])
        self.assertEqual(list(result["mycount"]), [1 + 4])

    def test_no_table(self):
        self.assertRaises(
            Exception, lambda: SQLDF(
                locals()).execute("select * from notable;"))

    def test_invalid_colname(self):
        data = [{"a": "valid", "(b)": "invalid"}]
        sqldf = SQLDF(locals())
        self.assertRaises(
            Exception,
            lambda: sqldf.execute("select * from data;"))

    def test_db_in_fs(self):
        data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]
        sqldf = SQLDF(locals(), inmemory=False)
        self.assertEqual(os.path.exists(".pysqldf.db"), True)
        sqldf = None  # run GC
        self.assertEqual(os.path.exists(".pysqldf.db"), False)
Example #22
0
# In[163]:

df


# In[151]:

conn = lite.connect('UN_education.db')
cur = conn.cursor()


# In[164]:

cur.execute('DROP TABLE IF EXISTS school_years')
df.to_sql('school_years', conn)


# In[156]:

cur.execute('SELECT * FROM school_years')
for r in cur:
    print r


# In[169]:

print df.describe()


Example #23
0
import sqlite3
import pandas as pd
import pandas.io.sql as sql
from pandas import Series, DataFrame

df = DataFrame({'A': [1,2,3]})
#df["id"] = df.index

con = sqlite3.connect("test.db")

df.to_sql("test", con)

    for name, params in tables.items():
        new_df = DataFrame()
        print(' -> Clean up {} dataframe'.format(name))
        for year in Years:
            # clean up the table
            clean_func = params[0]
            tmp_df = clean_func(year)

            if new_df.empty:
                new_df = tmp_df
            else:
                new_df = new_df.append(tmp_df, (params[1] == 'index'))

        print(' -> Load {} dataframe to SQL database'.format(name))

        if options.replace:
            if_exists = 'replace'
        else:
            if_exists = 'append'

        sql_options = {'con':sqlEngine, 'if_exists':if_exists,
                'index':True, 'chunksize':100}

        new_df.to_sql(name=name, **sql_options)

        # We want the index to be a primary key to speed things up among other reasons.
        sqlEngine.execute('''ALTER TABLE {}
                ADD PRIMARY KEY (`{}`)'''.format(name,params[1]))

Example #25
0
# coding:UTF-8
import dota2api

from pandas import DataFrame, Series
import pandas as pd
import numpy as np

import sqlalchemy

from settings import API_KEY
import settings
ACCOUNT_ID = 172282397

mysql_engine = sqlalchemy.create_engine("postgresql://{}@localhost/{}".format(settings.PSQL_USER,settings.PSQL_DB))
#mysql_engine = sqlalchemy.create_engine('mysql+pymysql://root@localhost/bp_helper_db?charset=utf8')
connection = mysql_engine.connect()
api = dota2api.Initialise(API_KEY)

#hero_info
heroes = api.get_heroes(language='zh')["heroes"]
heroes_df = DataFrame(heroes)
heroes_df.to_sql('dota2_hero',mysql_engine,if_exists='replace')
#item_info
items = api.get_game_items(language='zh')["items"]
items_df = DataFrame(items)
items_df.to_sql('dota2_item',mysql_engine,if_exists='replace')
Example #27
0
def run():
    global code
    global sell_point

    with sqlite3.connect("backtesting.db") as con:
        cursor = con.cursor()
        
        backtesting_save_data = { 'CODE':[],
                                  'NAME':[],
                                  'STRATEGY':[],
                                  'SELL_PRICE_RATIO':[],
                                  'PORTFOLIO_VALUE':[],
                                  'OPEN':[],
                                  'HIGH':[],
                                  'LOW':[],
                                  'CLOSE':[],
                                  'VOLUME':[],
                                  'PER':[],
                                  'BPS':[],
                                  'PBR':[]}

        for strategys in STRATEGY:
            for code, name in get_code_list_from_analyze(strategys):
                per , bps = get_per_bps_with_code(code)

                open, high, low, close, volume = get_last_data_with_code(code)
                if bps != 0:
                    pbr = close / bps
                else:
                    pbr = 0.0

                get_logger().debug("code : {}. name:{} strategy:{} start".format(code,name,strategys))
                
                last_portfolio = 0
                data = makeBacktestingDataFrame(code)
                for point in SELL_PRICE_RATIO:
                    sell_point = point
                    if strategys == 'MAGC':
                        algo = TradingAlgorithm(capital_base=10000000, initialize=initialize_magc, handle_data=handle_data_magc, identifiers=[code]  )
                        results = algo.run(data)
                    elif strategys == 'MACD':
                        algo = TradingAlgorithm(capital_base=10000000, initialize=initialize_macd, handle_data=handle_data_macd, identifiers=[code]  )
                        results = algo.run(data)
                    elif strategys == 'BBAND':
                        algo = TradingAlgorithm(capital_base=10000000, initialize=initialize_bband, handle_data=handle_data_bband, identifiers=[code]  )
                        results = algo.run(data)
                    
                    portfolio = results['portfolio_value'][-1]
                    if last_portfolio < portfolio:
                        if last_portfolio != 0 :
                            backtesting_save_data['CODE'].pop()
                            backtesting_save_data['NAME'].pop()
                            backtesting_save_data['STRATEGY'].pop()
                            backtesting_save_data['SELL_PRICE_RATIO'].pop()
                            backtesting_save_data['PORTFOLIO_VALUE'].pop()
                            backtesting_save_data['OPEN'].pop()
                            backtesting_save_data['HIGH'].pop()
                            backtesting_save_data['LOW'].pop()
                            backtesting_save_data['CLOSE'].pop()
                            backtesting_save_data['VOLUME'].pop()
                            backtesting_save_data['PER'].pop()
                            backtesting_save_data['BPS'].pop()
                            backtesting_save_data['PBR'].pop()

                        backtesting_save_data['CODE'].append(code)
                        backtesting_save_data['NAME'].append(name)
                        backtesting_save_data['STRATEGY'].append(strategys)
                        backtesting_save_data['SELL_PRICE_RATIO'].append('{}'.format(point))
                        backtesting_save_data['PORTFOLIO_VALUE'].append(portfolio)
                        backtesting_save_data['OPEN'].append(open)
                        backtesting_save_data['HIGH'].append(high)
                        backtesting_save_data['LOW'].append(low)
                        backtesting_save_data['CLOSE'].append(close)
                        backtesting_save_data['VOLUME'].append(volume)
                        backtesting_save_data['PER'].append(per)
                        backtesting_save_data['BPS'].append(bps)
                        backtesting_save_data['PBR'].append(pbr)
                        last_portfolio = portfolio

        backtesting_save_df = DataFrame(backtesting_save_data)
               
        backtesting_save_df.to_sql('BACK', con, if_exists='replace', chunksize=1000)
        get_logger().debug("code : {}. name:{} strategy:{} end".format(code,name,strategys))
Example #28
0
def run():
    with sqlite3.connect("analyze.db") as con:
        cursor = con.cursor()

        code_magc = {'CODE':[],
                     'NAME':[]}
        code_bband= {'CODE':[],
                     'NAME':[]}
        code_macd = {'CODE':[],
                     'NAME':[]}

        code_arrange = {'CODE':[],
                        'NAME':[]}

        per_bps_pbr = {'CODE':[],
                       'NAME':[],
                       'OPEN':[],
                       'HIGH':[],
                       'LOW':[],
                       'CLOSE':[],
                       'VOLUME':[],
                       'PER':[],
                       'BPS':[],
                       'PBR':[],
                       'INDUSTRY_CODE':[],
                       'INDUSTRY':[]}

        for code, name in get_code_list():
            df = makeDataFrame( code )
            if len(df) == 0: continue

            # per,bps, pbr table
            per , bps = get_per_bps_with_code(code)
            open, high, low, close, volume = get_last_data_with_code(code)
            if bps != 0:
                pbr = close / bps
            else:
                pbr = 0.0
            
            industry_code, industry_name = get_industry_with_code(code)

            per_bps_pbr['CODE'].append(code)
            per_bps_pbr['NAME'].append(name)
            per_bps_pbr['OPEN'].append(open)
            per_bps_pbr['HIGH'].append(high)
            per_bps_pbr['LOW'].append(low)
            per_bps_pbr['CLOSE'].append(close)
            per_bps_pbr['VOLUME'].append(volume)
            per_bps_pbr['PER'].append(per)
            per_bps_pbr['BPS'].append(bps)
            per_bps_pbr['PBR'].append(pbr)
            per_bps_pbr['INDUSTRY_CODE'].append(industry_code)
            per_bps_pbr['INDUSTRY'].append(industry_name)
            get_logger().debug("{} {} {} {} {} {} {} {} {} {}".format(code,name,open,high,low, close,volume,per,bps,pbr))

            arranged = isArrange( df, 60, 120 )
            if arranged == True and pbr <= 1:
                code_arrange['CODE'].append(code)
                code_arrange['NAME'].append(name)
                get_logger().debug("ARRANGE {}{}".format(code, name))
            
            res = isBBandSignal( df, 20 )
            if res == True and pbr <= 1:
                code_bband['CODE'].append(code)
                code_bband['NAME'].append(name)
                get_logger().debug("BBnad lower after up {}{}".format(code,name))

            res = isMAGoldCross( df, 20, 60 )
            if res == True and arranged == True and pbr <= 1:
                code_magc['CODE'].append(code)
                code_magc['NAME'].append(name)
                get_logger().debug("MA20, MA60 Golden Cross {}{}".format(code,name))

            res = isMACDSignal( df, 12, 26, 9)
            if res == True and arranged == True and pbr <= 1:
                code_macd['CODE'].append(code)
                code_macd['NAME'].append(name)
                get_logger().debug("MACD sig {}{}".format(code,name))

        magc = DataFrame(code_magc)
        bband   = DataFrame(code_bband)
        macd = DataFrame(code_macd)
        
        magc.to_sql("MAGC", con, if_exists='replace', chunksize=1000)
        get_logger().debug("MAGC {} saved.".format(len(magc)))
        bband.to_sql("BBAND", con, if_exists='replace', chunksize=1000)
        get_logger().debug("BBAND {} saved.".format(len(bband)))
        macd.to_sql("MACD", con, if_exists='replace', chunksize=1000)
        get_logger().debug("MACD {} saved.".format(len(macd)))

        arrange = DataFrame(code_arrange)
        arrange.to_sql("ARRANGE", con, if_exists='replace', chunksize=1000)
        get_logger().debug("ARRANGE {} saved.".format(len(arrange)))

        per_bps_pbr_df = DataFrame(per_bps_pbr)
        per_bps_pbr_df.to_sql("BPS", con, if_exists='replace', chunksize=1000)
        get_logger().debug("BPS {} saved.".format(len(per_bps_pbr_df)))
def load_audit_logs_into_postgres(options):
    session = ValkfleetConnector().db
    engine = WarehouseConnector().db

    log.info('Loading audit points for {start} to {stop} (batches of {n})'.format(
        start=options.start,
        stop=NOW,
        n=options.batch_size)
    )

    # Avoid loading data twice: grab the latest timestamp in the database
    result = engine.execute(sql.statements[1])
    resume_from = args.start

    if result.rowcount:
        last_timestamp = list(result)[0][0] + timedelta(milliseconds=1)
        if last_timestamp > args.start:
            resume_from = last_timestamp
            log.info('Resuming at %s', resume_from)

    cursor = None
    more = True
    batch_counter = 0
    batch_max_timestamp = None

    while more:
        url = ENDPOINT + '?filter=timestamp gt {start}&orderby=timestamp&batch_size={batch_size}'.format(
            start=resume_from.isoformat(),
            batch_size=options.batch_size
        )
        if cursor:
            url += '&cursor=%s' % cursor

        log.info('Requesting %s', url)
        response = session.get(url)
        json = response.json()

        if 'error' not in json.keys():
            records = json['items']
            more = json['more']
            cursor = json['cursor']

            def add_foreign_key(records_):
                for record in records_:
                    if record['event'] != 'acceptRoute-clicked':
                        record.update({'delivery_uuid': record['metadata']['delivery']})
                        del record['metadata']
                        yield record

            fc_records = list(add_foreign_key(records))
            batch = DataFrame().from_records(fc_records)

            if batch.empty:
                log.warning('Batch %s is empty', batch_counter)

            batch.to_sql(TABLE, engine,
                         schema=SCHEMA,
                         if_exists='append',
                         index=False)

            # The time columns are actually strings
            batch_min_timestamp = batch['timestamp'].min()[:19]
            batch_max_timestamp = batch['timestamp'].max()[:19]

            batch_counter += 1
            kwargs = dict(schema=SCHEMA,
                          table=TABLE,
                          n=batch_counter,
                          records=batch.shape[0],
                          fields=batch.shape[1],
                          min=batch_min_timestamp,
                          max=batch_max_timestamp)
            log.info('Loaded batch {n} into table {schema}.{table} '
                     '({records} records, {fields} fields):'
                     '{min} to {max}'.format(**kwargs))

        else:
            message = 'Lost the cursor on batch {n} {time}): {status} {error}'.format(
                n=batch_counter,
                time=batch_max_timestamp,
                status=response.status_code,
                error=response.json()
            )
            log.error(message, exc_info=True)
            exit(message)

    log.info('Finished loading tracks for %s to %s', options.start, NOW)
if not (args.apps or args.tests or args.extra or args.conv):
    benches = apps
else:
    if args.apps:
        benches.extend(apps)
    if args.tests:
        benches.extend(tests)
    if args.conv:
        benches.extend(conv)
    if args.extra:
        benches.extend(args.extra)

benches = filter(lambda a: a not in disabled, benches)

print 'Loading:\n  ' + '\n  '.join(benches)

res = DataFrame()

for app in benches:
    try:
        res = res.append(ingest(app))
    except: # IOError,e:
        print 'Skipping missing or malformed: '+app
    #except:
    #    print '\n\nFailed on',app,'\n'
    #    raise

db = create_engine('sqlite:///benchmarks.db')
res.to_sql('benchmarks', db)
res.to_csv('benchmarks.csv')