Esempio n. 1
1
    def test_if_exists(self):
        _skip_if_no_MySQLdb()
        df_if_exists_1 = DataFrame({'col1': [1, 2], 'col2': ['A', 'B']})
        df_if_exists_2 = DataFrame({'col1': [3, 4, 5], 'col2': ['C', 'D', 'E']})
        table_name = 'table_if_exists'
        sql_select = "SELECT * FROM %s" % table_name

        def clean_up(test_table_to_drop):
            """
            Drops tables created from individual tests
            so no dependencies arise from sequential tests
            """
            if sql.table_exists(test_table_to_drop, self.db, flavor='mysql'):
                cur = self.db.cursor()
                cur.execute("DROP TABLE %s" % test_table_to_drop)
                cur.close()

        # test if invalid value for if_exists raises appropriate error
        self.assertRaises(ValueError,
                          sql.write_frame,
                          frame=df_if_exists_1,
                          con=self.db,
                          name=table_name,
                          flavor='mysql',
                          if_exists='notvalidvalue')
        clean_up(table_name)

        # test if_exists='fail'
        sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name,
                        flavor='mysql', if_exists='fail')
        self.assertRaises(ValueError,
                          sql.write_frame,
                          frame=df_if_exists_1,
                          con=self.db,
                          name=table_name,
                          flavor='mysql',
                          if_exists='fail')

        # test if_exists='replace'
        sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name,
                        flavor='mysql', if_exists='replace')
        self.assertEqual(sql.tquery(sql_select, con=self.db),
                         [(1, 'A'), (2, 'B')])
        sql.write_frame(frame=df_if_exists_2, con=self.db, name=table_name,
                        flavor='mysql', if_exists='replace')
        self.assertEqual(sql.tquery(sql_select, con=self.db),
                         [(3, 'C'), (4, 'D'), (5, 'E')])
        clean_up(table_name)
                        
        # test if_exists='append'
        sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name,
                        flavor='mysql', if_exists='fail')
        self.assertEqual(sql.tquery(sql_select, con=self.db),
                         [(1, 'A'), (2, 'B')])
        sql.write_frame(frame=df_if_exists_2, con=self.db, name=table_name,
                        flavor='mysql', if_exists='append')
        self.assertEqual(sql.tquery(sql_select, con=self.db),
                         [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')])
        clean_up(table_name)
Esempio n. 2
0
def update_output_fields_table(masterDbConn, runPath):
    '''
    Updates the outputFields table in the master run database. If a
    field alredy exists it is skipped, otherwise it is added.
    
    Parameters
    ----------
    masterDbConn : sqlite connection object
        master database to connect to
    runPath : string
        path to the run folder for the apsimData.sqlite database for a 
        particular run
        
    Returns
    -------
    A list of fields that were updated in the table.
    '''
    
    # get the run database path
    apsimDbPath = os.path.join(runPath, 'data', 'apsimData.sqlite')
    
    # open run database
    apsimDbConn = lite.connect(apsimDbPath)
    
    with apsimDbConn:
        # read data from the outputFields table
        outputFields = psql.read_frame("SELECT * FROM outputFields;", apsimDbConn)
        
    with masterDbConn:
        # write outputFields to master database
        try:
            psql.write_frame(outputFields, 'outputFields', masterDbConn)
        except ValueError:# as e: # if table already exists then do nothing
            #print '*** Warning: {} Skipping write.'.format(e)
            pass
def save_repos(count=1000000):
    """ to get list of repos .... 
    the count is the number of requests to the API. Each request returns 100 repos
    to 'resume' this after already adding records to table, 
    add ?since=x where x is the last ID in your table.
    I'm only saving 6 variables right now because most of the variables returned 
    are URLs that follow a set structure and therefore could be easily built from the full_name'
    """

    con = MySQLdb.connect("localhost", USER, PASSWORD, "git", charset='utf8')
    url = 'https://api.github.com/repositories'
    for x in xrange(1,count):
        req = requests.get(url,auth=(USER,PASSWORD))
        url = req.links['next']['url']
        df_temp = pn.DataFrame()
        if(req.ok):
            repoItem = req.json
            repos_df_temp = pn.DataFrame.from_dict(repoItem)
            df_temp = repos_df_temp[['id','name','private','full_name','description','fork']]
            df_temp = df_temp.fillna('')		
            sql.write_frame(df_temp, con=con, name='repos', 
                if_exists='append', flavor='mysql')
        print 'fetched 100 rows'
        time.sleep(1.0)
    return df_temp
Esempio n. 4
0
def save_db(df, genedb_path):
    """Saves the data into the gene_features table.

    If the table already exists, the table is droped and then
    re-inserted.

    **Parameters**

    df : pd.DataFrame
        data to insert into DB table
    genedb_path : str
        path to sqlite db
    """
    logger.debug('Dropping gene_features table IF EXISTS.')
    _utils.drop_table('gene_features', genes_db_path=genedb_path, kind='sqlite')  # drop table if exists
    logger.debug('After dropping gene_features table IF EXISTS.')

    logger.info('Saving gene_features table ...')
    conn = sqlite3.connect(genedb_path)  # open connection
    # save to sqlite3 database
    psql.write_frame(df,  # pandas dataframe
                     'gene_features',  # table name
                     con=conn,  # connection
                     flavor='sqlite',  # use sqlite
                     if_exists='replace')  # drop table if exists
    conn.close()
    logger.info('Finished saving gene_features table.')
Esempio n. 5
0
def save_db(df, genedb_path):
    """Saves the data into the gene_features table.

    If the table already exists, the table is droped and then
    re-inserted.

    **Parameters**

    df : pd.DataFrame
        data to insert into DB table
    genedb_path : str
        path to sqlite db
    """
    logger.debug('Dropping gene_features table IF EXISTS.')
    _utils.drop_table('gene_features',
                      genes_db_path=genedb_path,
                      kind='sqlite')  # drop table if exists
    logger.debug('After dropping gene_features table IF EXISTS.')

    logger.info('Saving gene_features table ...')
    conn = sqlite3.connect(genedb_path)  # open connection
    # save to sqlite3 database
    psql.write_frame(
        df,  # pandas dataframe
        'gene_features',  # table name
        con=conn,  # connection
        flavor='sqlite',  # use sqlite
        if_exists='replace')  # drop table if exists
    conn.close()
    logger.info('Finished saving gene_features table.')
def setup_test_data():
    """Function uses sample githubarchive data.
    It saves a few hundred copies as a csv, as a python pickle file,
    as a hdf5 store, as mysql table and as mondodb.
    If you haven't run  timing tests before, you will need
    to run this first.
    """
    print 'use one hour of sample and replicate 100 times'
    #use only the repository data --
    onehr_df = ghd.load_local_archive_dataframe()
    onehr_json = ghd.load_local_archive_json()
    one_hr_repo_df = ghd.unnest_git_json(onehr_df)['repository']
    many_hr_repo_df = pn.DataFrame()
    for i in range(1,100):
        many_hr_repo_df = many_hr_repo_df.append(one_hr_repo_df)
    print('saving dataframe with', many_hr_repo_df.shape, "rows")
    print 'saving data to a csv file'
    many_hr_repo_df.to_csv('data/oneday.csv', encoding='utf-8')
    print 'dumping data to python pickle'
    pickle.dump(many_hr_repo_df, open('data/oneday.pyd', 'wb'))
    print 'dumping data to mysql database'
    con = mysql_setup()
    many_hr_repo_df_clean = many_hr_repo_df.fillna('')
    sql.write_frame(many_hr_repo_df_clean, 'oneday', con, 'mysql')
    print 'saving data to hdf5 filestore'
    store = pyt.HDFStore('data/git.h5')
    store.put('oneday', many_hr_repo_df)
    print 'saving data to mongodb'
    # repos_son = onehr_df['repository']
    many_hr_repo_df = many_hr_repo_df.dropna()    
    client = MongoClient()
    dbm = client['git']
    collection = dbm['gittest']
    # many_hr_repo_df = many_hr_repo_df.set_index(many_hr_repo_df.name)
    [collection.insert(onehr_json) for i in range(1,100)]
Esempio n. 7
0
 def test_keyword_as_column_names(self):
     '''
     '''
     _skip_if_no_MySQLdb()
     df = DataFrame({'From':np.ones(5)})
     sql.write_frame(df, con = self.db, name = 'testkeywords',
                     if_exists='replace', flavor='mysql')
Esempio n. 8
0
    def _check_roundtrip(self, frame):
        _skip_if_no_MySQLdb()
        drop_sql = "DROP TABLE IF EXISTS test_table"
        cur = self.db.cursor()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", "Unknown table.*")
            cur.execute(drop_sql)
        sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
        result = sql.read_frame("select * from test_table", self.db)

        # HACK! Change this once indexes are handled properly.
        result.index = frame.index
        result.index.name = frame.index.name

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame['txt'] = ['a'] * len(frame)
        frame2 = frame.copy()
        index = Index(lrange(len(frame2))) + 10
        frame2['Idx'] = index
        drop_sql = "DROP TABLE IF EXISTS test_table2"
        cur = self.db.cursor()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", "Unknown table.*")
            cur.execute(drop_sql)
        sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql')
        result = sql.read_frame("select * from test_table2", self.db,
                                index_col='Idx')
        expected = frame.copy()

        # HACK! Change this once indexes are handled properly.
        expected.index = index
        expected.index.names = result.index.names
        tm.assert_frame_equal(expected, result)
    def _check_roundtrip(self, frame):
        _skip_if_no_MySQLdb()
        drop_sql = "DROP TABLE IF EXISTS test_table"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
        result = sql.read_frame("select * from test_table", self.db)

        # HACK!
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame['txt'] = ['a'] * len(frame)
        frame2 = frame.copy()
        frame2['Idx'] = Index(list(range(len(frame2)))) + 10
        drop_sql = "DROP TABLE IF EXISTS test_table2"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame2,
                        name='test_table2',
                        con=self.db,
                        flavor='mysql')
        result = sql.read_frame("select * from test_table2",
                                self.db,
                                index_col='Idx')
        expected = frame.copy()
        expected.index = Index(list(range(len(frame2)))) + 10
        tm.assert_frame_equal(expected, result)
Esempio n. 10
0
 def test_legacy_write_frame(self):
     """Test legacy write frame name.
     Assume that functionality is already tested above so just do quick check that it basically works"""
     sql.write_frame(
         self.test_frame1, 'test_frame_legacy', self.conn, flavor='sqlite')
     self.assertTrue(
         sql.has_table('test_frame_legacy', self.conn, flavor='sqlite'), 'Table not written to DB')
Esempio n. 11
0
def save_db(maf_path, db_path, hypermutator_count):
    # merge all data frames together with the first
    # data frames given priority over later data frames
    df_cols = ['Gene_Symbol', 'Tumor_Sample', 'Tumor_Type', 'Chromosome',
               'Start_Position', 'End_Position', 'Variant_Classification',
               'Reference_Allele', 'Tumor_Allele', 'Protein_Change']
    df = pd.DataFrame(columns=df_cols)
    for single_maf in maf_path.split(','):
        tmp_df = pd.read_csv(single_maf, sep='\t')
        samp_names = set(df['Tumor_Sample'].tolist())
        tmp_df = tmp_df[tmp_df['Tumor_Sample'].apply(lambda x: x not in samp_names)]
        df = pd.concat([df, tmp_df])

    _utils.drop_table('maf_mutation', db_path, kind='sqlite')
    conn = sqlite3.connect(db_path)  # open connection

    # save tsv to sqlite3 database
    psql.write_frame(df,  # pandas dataframe
                     'maf_mutation',  # table name
                     con=conn,  # connection
                     flavor='sqlite',  # use sqlite
                     if_exists='replace')  # drop table if exists

    # filter hypermutator samples
    filter_hypermutators(hypermutator_count, conn, db_path)
Esempio n. 12
0
def save_repos(count=1000000):
    """ to get list of repos .... 
    the count is the number of requests to the API. Each request returns 100 repos
    to 'resume' this after already adding records to table, 
    add ?since=x where x is the last ID in your table.
    I'm only saving 6 variables right now because most of the variables returned 
    are URLs that follow a set structure and therefore could be easily built from the full_name'
    """

    con = MySQLdb.connect("localhost", USER, PASSWORD, "git", charset='utf8')
    url = 'https://api.github.com/repositories'
    for x in xrange(1, count):
        req = requests.get(url, auth=(USER, PASSWORD))
        url = req.links['next']['url']
        df_temp = pn.DataFrame()
        if (req.ok):
            repoItem = req.json
            repos_df_temp = pn.DataFrame.from_dict(repoItem)
            df_temp = repos_df_temp[[
                'id', 'name', 'private', 'full_name', 'description', 'fork'
            ]]
            df_temp = df_temp.fillna('')
            sql.write_frame(df_temp,
                            con=con,
                            name='repos',
                            if_exists='append',
                            flavor='mysql')
        print 'fetched 100 rows'
        time.sleep(1.0)
    return df_temp
Esempio n. 13
0
def create_tables(masterDbConn, gridLut):
    '''
    Creates each of the tables in the master run database.
    
    Parameters
    ----------
    masterDbConn : sqlite connection object
        master database to connect to
    gridLut : pandas dataframe
        contains the grid information (point_id, lat, lon, county, etc.)
    
    Returns
    -------
    Nothing.
    '''
    with masterDbConn:
        # create runParameters table
        sql = "CREATE TABLE runParameters (run_id INTEGER PRIMARY KEY, met TEXT, crop TEXT, resolution REAL, clock_start TEXT, clock_end TEXT, crit_fr_asw REAL, sow_start TEXT, sow_end TEXT, harvest_date TEXT, soil_name TEXT)"
        masterDbConn.execute(sql)
        
        # create apsimOutput table
        # handeled in update_apsim_output_table()
        
        # create outputFields table
        # handeled in update_output_fields_table()
        
        # create gridPoints table
        psql.write_frame(gridLut, 'gridPoints', masterDbConn)
Esempio n. 14
0
def save_db(maf_path, db_path, hypermutator_count):
    # merge all data frames together with the first
    # data frames given priority over later data frames
    df_cols = [
        'Gene_Symbol', 'Tumor_Sample', 'Tumor_Type', 'Chromosome',
        'Start_Position', 'End_Position', 'Variant_Classification',
        'Reference_Allele', 'Tumor_Allele', 'Protein_Change'
    ]
    df = pd.DataFrame(columns=df_cols)
    for single_maf in maf_path.split(','):
        tmp_df = pd.read_csv(single_maf, sep='\t')
        samp_names = set(df['Tumor_Sample'].tolist())
        tmp_df = tmp_df[tmp_df['Tumor_Sample'].apply(
            lambda x: x not in samp_names)]
        df = pd.concat([df, tmp_df])

    _utils.drop_table('maf_mutation', db_path, kind='sqlite')
    conn = sqlite3.connect(db_path)  # open connection

    # save tsv to sqlite3 database
    psql.write_frame(
        df,  # pandas dataframe
        'maf_mutation',  # table name
        con=conn,  # connection
        flavor='sqlite',  # use sqlite
        if_exists='replace')  # drop table if exists

    # filter hypermutator samples
    filter_hypermutators(hypermutator_count, conn, db_path)
Esempio n. 15
0
    def _check_roundtrip(self, frame):
        _skip_if_no_MySQLdb()
        drop_sql = "DROP TABLE IF EXISTS test_table"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
        result = sql.read_frame("select * from test_table", self.db)

        # HACK!
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame['txt'] = ['a'] * len(frame)
        frame2 = frame.copy()
        frame2['Idx'] = Index(list(range(len(frame2)))) + 10
        drop_sql = "DROP TABLE IF EXISTS test_table2"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql')
        result = sql.read_frame("select * from test_table2", self.db,
                                index_col='Idx')
        expected = frame.copy()
        expected.index = Index(list(range(len(frame2)))) + 10
        tm.assert_frame_equal(expected, result)
Esempio n. 16
0
    def test_uquery(self):
        try:
            import MySQLdb
        except ImportError:
            raise nose.SkipTest
        frame = tm.makeTimeDataFrame()
        drop_sql = "DROP TABLE IF EXISTS test_table"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
        stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)'
        self.assertEqual(sql.uquery(stmt, con=self.db), 1)

        try:
            sys.stdout = StringIO()

            self.assertRaises(MySQLdb.ProgrammingError,
                              sql.tquery,
                              'insert into blah values (1)',
                              con=self.db)

            self.assertRaises(MySQLdb.ProgrammingError,
                              sql.tquery,
                              'insert into blah values (1)',
                              con=self.db,
                              retry=True)
        finally:
            sys.stdout = sys.__stdout__
Esempio n. 17
0
    def test_tquery(self):
        try:
            import MySQLdb
        except ImportError:
            raise nose.SkipTest
        frame = tm.makeTimeDataFrame()
        drop_sql = "DROP TABLE IF EXISTS test_table"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
        result = sql.tquery("select A from test_table", self.db)
        expected = frame.A
        result = Series(result, frame.index)
        tm.assert_series_equal(result, expected)

        try:
            sys.stdout = StringIO()
            self.assertRaises(MySQLdb.ProgrammingError,
                              sql.tquery,
                              'select * from blah',
                              con=self.db)

            self.assertRaises(MySQLdb.ProgrammingError,
                              sql.tquery,
                              'select * from blah',
                              con=self.db,
                              retry=True)
        finally:
            sys.stdout = sys.__stdout__
Esempio n. 18
0
def load_company_basic_info():
    #下载公司基本信息,包括股票代码、pe、市盈率等数据
    try:
        rs=ts.get_stock_basics()
        sql.write_frame(rs, "company_basic_info", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True)
        print("公司基本信息数据ok")
    except:
        print("公司基本信息数据出错")
def import_to_db(conn, df):
    try:
        psql.write_frame(df, 'locallists', conn,
                flavor="mysql", if_exists='append', index=None)
        conn.commit()
    finally:
        conn.close()
    print 'mysql done'
Esempio n. 20
0
 def create_bottom_100_all_time(self, bottom_100_all_time):
     """ bottom_100_all_time should be a pandas DataFrame that comes from
         the funtion reviewskimmer.imdb.charts.get_bottom_100_all_time. """
     psql.write_frame(bottom_100_all_time,
                      con=self.db,
                      name='rs_bottom_100_all_time',
                      if_exists='replace',
                      flavor='mysql')
Esempio n. 21
0
def main():
  infile = "./all_players.csv"
  db = connect()[0]
  cur = connect()[1]
  cur.execute("USE fantasy_lineups;")
  df = pd.read_csv(infile)
  sql.write_frame(df, con=db, name='players', if_exists='replace', flavor='mysql')
  cur.close()
  db.close()
Esempio n. 22
0
    def _check_roundtrip(self, frame):
        sql.write_frame(frame, name='test_table', con=self.db)
        result = sql.read_frame("select * from test_table", self.db)

        # HACK!
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)
Esempio n. 23
0
    def _check_roundtrip(self, frame):
        sql.write_frame(frame, name='test_table', con=self.db)
        result = sql.read_frame("select * from test_table", self.db)

        # HACK!
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)
Esempio n. 24
0
def _write_table(tablename, df, conn):
    "writes a dataframe to the sqlite database"

    for col in df.columns:
        if re.search("[() ]", col):
            msg = "please follow SQLite column naming conventions: "
            msg += "http://www.sqlite.org/lang_keywords.html"
            raise Exception(msg)

    write_frame(df, name=tablename, con=conn, flavor='sqlite')
Esempio n. 25
0
def _write_table(tablename, df, conn):
    "writes a dataframe to the sqlite database"

    for col in df.columns:
        if re.search("[() ]", col):
            msg = "please follow SQLite column naming conventions: "
            msg += "http://www.sqlite.org/lang_keywords.html"
            raise Exception(msg)

    write_frame(df, name=tablename, con=conn, flavor='sqlite')
Esempio n. 26
0
 def saveDFToDB(self, results, table_name=None):
     #NOTE Always drop ?
     if not table_name:
         table_name = '_'.join(('dataframe', dt.datetime.strftime(dt.datetime.now(), format='%d%m%Y')))
     self._log.info('Dropping previous table')
     self.execute('drop table if exists %s' % table_name)
     #self._log.info('Saving results (id:{})'.format(self.monthly_perfs['created']))
     results['date'] = results.index
     pd_sql.write_frame(results, table_name, self._connection)
     return table_name
Esempio n. 27
0
def update_apsim_output_table(masterDbConn, runPath, update):
    '''
    Updates the apsimOutput table in the master run database. If a run
    is already there it is updated, otherwise it is added.
    
    Parameters
    ----------
    masterDbConn : sqlite connection object
        master database to connect to
    runPath : string
        path to the run folder for the apsimData.sqlite database for a 
        particular run
    update : bool
        if the database needs to be updated or if it is the first commit for a
        particular run
        
    Returns
    -------
    Nothing.
    '''
    # get the runId
    runId = int(os.path.split(runPath)[1])
    
    # don't do anything if the database is being updated
    if update == True:
        print "*** Warning: Run {} data may already exist. Skipping write.".format(runId)
        return
    
    # get sow start from parameters table
    sql = "SELECT sow_start FROM runParameters WHERE run_id = {}".format(runId)
    sowStart = psql.read_frame(sql, masterDbConn).ix[0][0]
    
    # check to see if sow date is auto (determined from lookup table)
    if sowStart == 'auto':
        # read sow start for each location
        sql = "SELECT point_id, sow_start FROM gridPoints"
        sowDates = psql.read_frame(sql, masterDbConn, index_col='point_id')
    else:
        # set sow start the same for each location
        sql = "SELECT point_id FROM gridPoints"
        gridPoints = psql.read_frame(sql, masterDbConn)
        sowDates = pandas.DataFrame([sowStart] * len(gridPoints), index=gridPoints['point_id'])
    
    # get the run database path
    apsimDbPath = os.path.join(runPath, 'data', 'apsimData.sqlite')
    
    # read and convert to yearly formatted data
    apsimData = _apsim_output(apsimDbPath, sowDates)
    
    # add column with runId
    runIdSeries = pandas.Series([runId] * len(apsimData))
    apsimData['run_id'] = runIdSeries
    
    # write runData to master database
    psql.write_frame(apsimData, 'apsimOutput', masterDbConn, if_exists='append')
Esempio n. 28
0
def main():
    # write_sched_csv()
    infile = "./schedules.csv"
    db = connect()[0]
    cur = connect()[1]
    cur.execute("USE fantasy_lineups;")
    df = pd.read_csv(infile)
    sql.write_frame(df, con=db, name="schedules", if_exists="replace", flavor="mysql")

    cur.close()
    db.close()
def main():
  field_names = ['game_id','plyr_id','name','is_starter']
  infile = "./past_starter_data.csv"
  db = connect()[0]
  cur = connect()[1]
  cur.execute("USE fantasy_lineups;")
  df = pd.read_csv(infile)
  sql.write_frame(df, con=db, name='past_starters', if_exists='replace', flavor='mysql')
   
  cur.close()
  db.close()
Esempio n. 30
0
 def saveDFToDB(self, results, table_name=None):
     #NOTE Always drop ?
     if not table_name:
         table_name = '_'.join(('dataframe',
                                dt.datetime.strftime(dt.datetime.now(),
                                                     format='%d%m%Y')))
     self._log.info('Dropping previous table')
     self.execute('drop table if exists %s' % table_name)
     #self._log.info('Saving results (id:{})'.format(self.monthly_perfs['created']))
     results['date'] = results.index
     pd_sql.write_frame(results, table_name, self._connection)
     return table_name
Esempio n. 31
0
def toDB(con, table, tableName):
    
    #Drop table if it exists
    if psql.table_exists(tableName, con, flavor='sqlite'):
        cur = con.cursor()
        sql = 'DROP TABLE "main"."{}"'.format(tableName)
        cur.execute(sql)
        con.commit()
    
    #Write to db    
    psql.write_frame(table, tableName , con)
    con.commit()
Esempio n. 32
0
def toDB(con, table, tableName):

    #Drop table if it exists
    if psql.table_exists(tableName, con, flavor='sqlite'):
        cur = con.cursor()
        sql = 'DROP TABLE "main"."{}"'.format(tableName)
        cur.execute(sql)
        con.commit()

    #Write to db
    psql.write_frame(table, tableName, con)
    con.commit()
def main():
  field_names = ['plyr_id', 'proj_pts','week']
  infile = "./espn-proj2.csv"
  db = connect()[0]
  cur = connect()[1]
  cur.execute("USE fantasy_lineups;")
  df = pd.read_csv(infile)
  sql.write_frame(df, con=db, name='espn_projections', if_exists='replace', flavor='mysql')
   
  cur.close()
  del cur
  db.close()
  del db  
def main():
  field_names = ['plyr_id', 'name', 'position', 'is_starter']
  infile = "./starters_2013.csv"
  db = connect()[0]
  cur = connect()[1]
  cur.execute("USE fantasy_lineups;")
  df = pd.read_csv(infile)
  sql.write_frame(df, con=db, name='current_starters', if_exists='replace', flavor='mysql')
   
  cur.close()
  del cur
  db.close()
  del db  
Esempio n. 35
0
def main():
  field_names = ['game_id','coach_id', 'team', 'is_home', 'year']
  infile = "./coach_data.csv"
  db = connect()[0]
  cur = connect()[1]
  cur.execute("USE fantasy_lineups;")
  df = pd.read_csv(infile)
  sql.write_frame(df, con=db, name='coaches', if_exists='replace', flavor='mysql')
   
  cur.close()
  del cur
  db.close()
  del db  
Esempio n. 36
0
 def writeFrameToDB(self, df, SeriesName):
     
     #Write to db
     try:
         self.connect()
         psql.write_frame( df, SeriesName, self.con, if_exists='append', safe_names=False)
         self.con.commit()
         logging.info("Wrote series ()".format(SeriesName))
     except:
         logging.error("Problems with {}".format(SeriesName))
         raise
     finally:
         self.disconnect()
def main():
  field_names = ['plyr_id', 'name', 'position', 'points', 'week']
  infile = "./fantasy_full_season.csv"
  db = connect()[0]
  cur = connect()[1]
  cur.execute("USE fantasy_lineups;")
  df = pd.read_csv(infile)
  sql.write_frame(df, con=db, name='all_predictions', if_exists='replace', flavor='mysql')
   
  cur.close()
  del cur
  db.close()
  del db  
def write_final(dirname, work, final, extract_methods):
    df = extract_data(work)

    if 'csv' in extract_methods:
        csv = os.path.join(final, dirname + ".csv")
        df.to_csv(csv, index=False, header=True)
        print "\tSUCCESS: Extracted data from .out file. CSV written to ./final/%s.csv" % dirname

    if 'sqlite3' in extract_methods:
        db_path = os.path.join(final, "data.db")
        conn = sqlite3.connect(
            db_path, timeout=10)  # 10 seconds to avoid write deadlock?
        try:
            sqlio.write_frame(df,
                              name='trees_fvsaggregate',
                              con=conn,
                              flavor='sqlite',
                              if_exists='append')
        except sqlite3.IntegrityError as e:
            if e.message.endswith("are not unique"):
                # try to drop and rerun
                cursor = conn.cursor()

                delete_sql = """DELETE FROM trees_fvsaggregate
                  WHERE var = '%(var)s'
                  AND rx = %(rx)d
                  AND cond = %(cond)d
                  AND site = %(site)d
                  AND climate = '%(climate)s'
                """ % df.irow(0)  # assume the dataframe has the same data

                res = cursor.execute(delete_sql)
                if res.rowcount > 0:
                    print "\tNOTICE : Deleting %d old rows from ./final/data.db" % res.rowcount

                # try again
                sqlio.write_frame(df,
                                  name='trees_fvsaggregate',
                                  con=conn,
                                  flavor='sqlite',
                                  if_exists='append')

            else:
                # something else went wrong
                conn.rollback()
                raise sqlite3.IntegrityError(e.message)

        conn.commit()
        conn.close()
        print "\tSUCCESS: Extracted data from .out file. Row appended to ./final/data.db"
Esempio n. 39
0
def writeToDb(data, db_conn):
    """
    Take the list of results and write to sqlite database
    """
    data_frame = pd.DataFrame(data)
    data_frame['scrape_time'] = strftime("%Y%m%d%H%M%S", gmtime())
    pdsql.write_frame(
        data_frame,
        "bikedata",
        db_conn,
        flavor="sqlite",
        if_exists="append",
    )
    db_conn.commit()
Esempio n. 40
0
    def test_onecolumn_of_integer(self):
        # GH 3628
        # a column_of_integers dataframe should transfer well to sql

        mono_df=DataFrame([1 , 2], columns=['c0'])
        sql.write_frame(mono_df, con = self.db, name = 'mono_df')
        # computing the sum via sql
        con_x=self.db
        the_sum=sum([my_c0[0] for  my_c0 in con_x.execute("select * from mono_df")])
        # it should not fail, and gives 3 ( Issue #3628 )
        self.assertEqual(the_sum , 3)

        result = sql.read_frame("select * from mono_df",con_x)
        tm.assert_frame_equal(result,mono_df)
Esempio n. 41
0
 def importData():
     
     #Start Time
     start = datetime(2010,1,1)
     end = datetime.date(datetime.now())
     data = DataReader(sp500constituents[0], "yahoo", start, end)
     
     
     en = enumerate(sp500constituents)
     [i for i, x in en if x=='WFMI']
     
     
     sp500constituents[200:len(sp500constituents)]
     problems = []
     dataImportProblems = []
     for series in sp500constituents[485:len(sp500constituents)]:
         print series 
         try:  
             data = DataReader(series, "yahoo", start, end)
             data = data.reset_index()
         except:
             print "Can't read {}".format(series)
             dataImportProblems.append(series)
             continue
         con = sqlite3.connect("/home/phcostello/Documents/Data/FinanceData.sqlite")
         try:
             psql.write_frame( data, series, con)
             con.commit()
         except:
             print "Problems with {}".format(series)
             problems.append(series)
         finally:
             con.close()
     
     #changing tables to have date formats so RODBC driver recognizes
     #Should check that this is occuring above.
     con = sqlite3.connect("/home/phcostello/Documents/Data/FinanceData.sqlite")
     for tb in sp500constituents:
         if psql.has_table(tb, con):
             sqltxt = "SELECT * FROM {}".format(tb)
             #print sqltxt
             data = psql.read_frame(sqltxt, con)
             sqlDropTxt = 'DROP TABLE "main"."{}"'.format(tb)
             #print sqlDropTxt
             psql.execute(sqlDropTxt, con)
             con.commit()
             psql.write_frame( data, tb, con)
             con.commit()
     
     con.close()
Esempio n. 42
0
    def test_tquery(self):
        frame = tm.makeTimeDataFrame()
        sql.write_frame(frame, name="test_table", con=self.db)
        result = sql.tquery("select A from test_table", self.db)
        expected = frame.A
        result = Series(result, frame.index)
        tm.assert_series_equal(result, expected)

        try:
            sys.stdout = StringIO()
            self.assertRaises(sqlite3.OperationalError, sql.tquery, "select * from blah", con=self.db)

            self.assertRaises(sqlite3.OperationalError, sql.tquery, "select * from blah", con=self.db, retry=True)
        finally:
            sys.stdout = sys.__stdout__
Esempio n. 43
0
    def test_onecolumn_of_integer(self):
        '''
        GH 3628
        a column_of_integers dataframe should transfer well to sql
        '''
        mono_df=DataFrame([1 , 2], columns=['c0'])
        sql.write_frame(mono_df, con = self.db, name = 'mono_df')
        # computing the sum via sql
        con_x=self.db
        the_sum=sum([my_c0[0] for  my_c0 in con_x.execute("select * from mono_df")])
        # it should not fail, and gives 3 ( Issue #3628 )
        self.assertEqual(the_sum , 3)

        result = sql.read_frame("select * from mono_df",con_x)
        tm.assert_frame_equal(result,mono_df)
Esempio n. 44
0
 def addSeriesToUpdateList(self, filename, newType = False, newSource=False):
     
     logging.basicConfig(level = logging.DEBUG)
     self.connect()
     
     #TODO maybe add these as data tables to update
     #validation lists
     existing_types = set(self.seriesList['Type'])
     existing_sources = set(self.seriesList['Source'])
     existing_series = set(self.seriesList['SeriesName'])
     
     
     ''' Adds series to be updated and checks info is ok'''
     thisSeriesList = pd.read_csv(filename)
     
     #Check correct colnames
     if not(self.seriesList.columns == list(thisSeriesList.columns)).all():
         raise ValueError('Columns (names) in import file are incorrect')
     
     #Strip whitespace in table values
     thisSeriesList = thisSeriesList.applymap(lambda x : x.strip() )
     
     #Convert Start and End to Datetime
     thisSeriesList[['StartRange', 'EndRange']] = thisSeriesList[['StartRange', 'EndRange']].applymap(pd.to_datetime)
     
     #Append to SeriesList
     
     for row in thisSeriesList.iterrows():
         
         row = row[1] #itterow is tuple with second arg = value
         
         #Check type is allowable
         if (row['Type'] not in existing_types) and not newType:
             logging.error('Series {0} has type {1} not in existing types'.format(row['SeriesName'],row['Type']))
             continue
         if row['Source'] not in existing_sources and not newSource:
             logging.error('Series {0} has source {1} not in existing sources'.format(row['SeriesName'],row['Source']))
             continue
         if row['SeriesName'] in existing_series:
             logging.error('Series {0} is already in existing SeriesNames'.format(row['SeriesName']))
             continue
             
         
         #if passes all checks then write to db
         logging.info('Wrote {} to SeriesList'.format(row['SeriesName']))
         row = pd.DataFrame(row).transpose()
         psql.write_frame(row, 'SeriesList', self.con, if_exists='append')
         self.con.commit()
Esempio n. 45
0
    def test_uquery(self):
        frame = tm.makeTimeDataFrame()
        sql.write_frame(frame, name='test_table', con=self.db)
        stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)'
        self.assertEqual(sql.uquery(stmt, con=self.db), 1)

        try:
            sys.stdout = StringIO()

            self.assertRaises(sqlite3.OperationalError, sql.tquery,
                              'insert into blah values (1)', con=self.db)

            self.assertRaises(sqlite3.OperationalError, sql.tquery,
                              'insert into blah values (1)', con=self.db,
                              retry=True)
        finally:
            sys.stdout = sys.__stdout__
Esempio n. 46
0
    def test_tquery(self):
        frame = tm.makeTimeDataFrame()
        sql.write_frame(frame, name='test_table', con=self.db)
        result = sql.tquery("select A from test_table", self.db)
        expected = frame.A
        result = Series(result, frame.index)
        tm.assert_series_equal(result, expected)

        try:
            sys.stdout = StringIO()
            self.assertRaises(sqlite3.OperationalError, sql.tquery,
                              'select * from blah', con=self.db)

            self.assertRaises(sqlite3.OperationalError, sql.tquery,
                              'select * from blah', con=self.db, retry=True)
        finally:
            sys.stdout = sys.__stdout__
Esempio n. 47
0
def processTrackEcho(tuple):
    track =int(tuple[0])
    aid=int(tuple[1])
    con = MySQLdb.connect(host="bigblasta.chiim1n4uxwu.eu-central-1.rds.amazonaws.com", user="******", passwd="Jo27051980", db="bigblasta")
    cursor = con.cursor()
    t0 = datetime.now()
    attribute_dict = echonest_audio(track)
    now = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
    attribute_dict['aid']=aid
    attribute_dict['date']=now
    t1 = datetime.now()
    ser = Series(attribute_dict)
    df = DataFrame(ser).T
    sql.write_frame(df, con=con, name='echonest', if_exists='append', flavor='mysql')
    t2 = datetime.now()
    dur1 = (t1-t0).total_seconds()
    dur2 = (t2-t1).total_seconds()
    print  "Echonest response: %f, MySQL: %f" %(dur1, dur2)
def write_values_as_scraperwiki_style_sql(base_dir):
    TABLE_NAME = "value"
    values = get_values_as_dataframe()
    values.replace(to_replace=[float('inf')], value=['na'], inplace=True)
    values['dsID'] = 'fts'
    values['is_number'] = 1
    values['source'] = ''
    values = values.rename(columns={'indicator': 'indID', 'year': 'period'})
    values = values[[
        'dsID', 'region', 'indID', 'period', 'value', 'is_number', 'source'
    ]]

    filename = os.path.join(base_dir, 'ocha.db')
    sqlite_db = sqlite3.connect(filename)
    sqlite_db.execute("drop table if exists {};".format(TABLE_NAME))
    #values = values.reset_index()
    sql.write_frame(values, TABLE_NAME, sqlite_db)
    print values
Esempio n. 49
0
	def save_settle_data(self, product, start=None):
		"""Like it says. If the datafile already exists it will bring it up to date, otherwise it will 
		begin from 'start' which defaults to 6 months."""
		self.product = product
		self.reset(product, self._today)
		
		fsettle = self.DATA + 'settle/' + product.lower() + '.sql'
		conn = sqlite3.connect(fsettle, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
		cur = conn.cursor()
		
		try:
			cur.execute('SELECT timestamp FROM options AS "[timestamp]" GROUP BY timestamp ' \
				'ORDER BY timestamp')
			rows = cur.fetchall()
		except:
			rows = []
		
		if not rows:
			if start is None:
				start = self._today + relativedelta(months=-6)
		else:
			start = business_day(rows[-1][0], 1, self.cal.holidays)
			
		while start < self._today:
			if not start in rows:
				print "Saving settle data for " + str(start)
				try:
					futf, optf = self.snap_by_delta(product, start)
					del futf['ticker']
					del futf['last_trade']
					optf['timestamp'] = start
					futf['timestamp'] = start
					optf.reset_index(inplace=True)
					futf.reset_index(inplace=True)
					sql.write_frame(optf, name='options', con=conn, if_exists='append') 
					sql.write_frame(futf, name='futures', con=conn, if_exists='append')
				except Exception as e:
					print e
	
			start = business_day(start, 1, self.cal.holidays)
			
		conn.close()
Esempio n. 50
0
    def _check_roundtrip(self, frame):
        sql.write_frame(frame, name='test_table', con=self.db)
        result = sql.read_frame("select * from test_table", self.db)

        # HACK!
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame['txt'] = ['a'] * len(frame)
        frame2 = frame.copy()
        frame2['Idx'] = Index(range(len(frame2))) + 10
        sql.write_frame(frame2, name='test_table2', con=self.db)
        result = sql.read_frame("select * from test_table2",
                                self.db,
                                index_col='Idx')
        expected = frame.copy()
        expected.index = Index(range(len(frame2))) + 10
        tm.assert_frame_equal(expected, result)
Esempio n. 51
0
	def snap(self, product):
		"""Snap live data and bring settlement data up to date"""
		self.save_settle_data(product)	# settlement data up-to-date. N.B. also calls reset()
		
		dnow = datetime.datetime.now()
		flive = self.DATA + 'live/' + product.lower() + '.sql'
		conn = sqlite3.connect(flive)
		cur = conn.cursor()
		
		futf, optf = self.snap_by_delta(product, dnow)
		del futf['ticker']
		del futf['last_trade']
		optf['timestamp'] = dnow
		futf['timestamp'] = dnow
		optf.reset_index(inplace=True)
		futf.reset_index(inplace=True)
		sql.write_frame(optf, name='options', con=conn, if_exists='append') 
		sql.write_frame(futf, name='futures', con=conn, if_exists='append')
			
		conn.close()
Esempio n. 52
0
    def save2db(self, items='all'):
        """Save project info to database
        
        Args:
            items: 'jobs','results' and 'runsummary' respectively save jobs,
                results or run summary to the database; 'all' saves everything            
        
        """

        db_abspath = os.path.join(self.resultsdir_abspath, self.db_name)
        cnx = sqlite3.connect(db_abspath)

        if items == 'all' or items == 'jobs':
            sql.write_frame(self.jobs_df,
                            name='Jobs',
                            con=cnx,
                            if_exists='append')
        if items == 'all' or items == 'results':
            sql.write_frame(self.results_df,
                            name='Results',
                            con=cnx,
                            if_exists='append')
        if items == 'all' or items == 'runsummary':
            sql.write_frame(self.runsum_df,
                            name='RunSummary',
                            con=cnx,
                            if_exists='append')

        cnx.close()
Esempio n. 53
0
    def _check_roundtrip(self, frame):
        sql.write_frame(frame, name='test_table', con=self.db)
        result = sql.read_frame("select * from test_table", self.db)

        # HACK! Change this once indexes are handled properly.
        result.index = frame.index

        expected = frame
        tm.assert_frame_equal(result, expected)

        frame['txt'] = ['a'] * len(frame)
        frame2 = frame.copy()
        frame2['Idx'] = Index(lrange(len(frame2))) + 10
        sql.write_frame(frame2, name='test_table2', con=self.db)
        result = sql.read_frame("select * from test_table2", self.db,
                                index_col='Idx')
        expected = frame.copy()
        expected.index = Index(lrange(len(frame2))) + 10
        expected.index.name = 'Idx'
        print(expected.index.names)
        print(result.index.names)
        tm.assert_frame_equal(expected, result)