Beispiel #1
0
def get_company_stock(code='sh600000',
                      date1=datetime.date(2017, 1, 1),  # for backward;
                      date2=datetime.date(2016, 1, 1)):
    """Crawling the stock price and other information by tushare."""
    """Storing the data in a sqlite database."""
    try:
        code_tushare = code[2:]
        # print('changing')
        time_format = settings.get_time_format()
        startdate = date1.strftime(time_format)
        enddate = date2.strftime(time_format)
        data = ts.get_hist_data(code_tushare, enddate, startdate)
        if data is not None:
            data.insert(0, 'code', code)
            conn = sqlite3.connect(settings.get_sqlite_path())
            cur = conn.cursor()
            cur.execute(''' SELECT name FROM sqlite_master WHERE name='Stock'
                 ''')
            exists = cur.fetchone()
            # print(exists)
            if exists is not None:
                cur.execute(''' SELECT code FROM Stock WHERE code = ?''',(code,))
                exists = cur.fetchone()
            # print(exists)
            if exists is None:
                data.to_sql('Stock', conn, if_exists='append')
            conn.commit()
            print('company(code: {}) stock crawling finished'.format(code))
        else:
            print('Not get stock information for {}'.format(code))
            sn.store_error_text('no stock information:  ' + code)
    except Exception as e:
        print(e,code)
        sn.store_error_text('stock loading error:  ' + code)
def company_list_load():
    """"Company list load."""
    conn = sqlite3.connect(settings.get_sqlite_path())
    cur = conn.cursor()

    fname1, fname2 = settings.get_exchange_path()

    fh1 = open(fname1)
    for line in fh1:
        pieces = line.split(',')
        cur.execute('SELECT name FROM Companies WHERE code = ?',
                    ('sz' + pieces[0], ))
        row = cur.fetchone()
        if row is None:
            cur.execute(
                ''' INSERT INTO Companies (code, name)
                            VALUES (?, ?)
                        ''', ('sz' + pieces[0], pieces[1]))
    conn.commit()

    fh2 = open(fname2)
    for line in fh2:
        pieces = line.split(',')
        cur.execute('SELECT name FROM Companies WHERE code = ?',
                    ('sh' + pieces[0], ))
        row = cur.fetchone()
        if row is None:
            cur.execute(
                ''' INSERT INTO Companies (code, name)
                            VALUES (?, ?)
                        ''', ('sh' + pieces[0], pieces[1]))
    conn.commit()
    print('company list loading end')
def store_article_sqlite(article):
    # print('starting store')
    # print(article.content)
    if article.content is not "None":
        conn = sqlite3.connect(settings.get_sqlite_path(), timeout=5)
        cur = conn.cursor()
        # source table
        cur.execute(''' SELECT id FROM Source WHERE source = ?''', (article.source,))
        source_id = cur.fetchone()
        if source_id is None:
            cur.execute(''' INSERT OR IGNORE INTO Source (source) VALUES(?)''', (article.source,))  

        # News
        cur.execute(''' SELECT id FROM Source WHERE source = ?''', (article.source,))
        source_id = cur.fetchone()[0]
        cur.execute(''' SELECT id FROM News WHERE news_id = ?''', (article.newsid,))
        news_table_id = cur.fetchone()
        if news_table_id is None:
            cur.execute(''' INSERT INTO News (news_id, source_id, pubdate, url, title, content)
                VALUES(?, ?, ?, ?, ?, ?)''', (article.newsid, source_id, article.pubdate, article.url, article.title, article.content,))

        # Company_News
        cur.execute(''' SELECT id FROM News WHERE news_id = ?''', (article.newsid,))
        news_table_id = cur.fetchone()
        cur.execute(''' SELECT company_id FROM Company_News WHERE news_table_id = ?''', (news_table_id[0],))
        related_companies = cur.fetchall()
        related_company = []
        for com in related_companies:
            related_company.append(com[0])

        if (related_company is None) or (article.companyid not in related_company):
            cur.execute(''' INSERT INTO Company_News (news_table_id, company_id)
                    VALUES(?, ?)''', (news_table_id[0], article.companyid,))
        conn.commit()
    print('{} stored over'.format(article.title))
def store_sqlite_value():
    """Store the value in Data Base."""
    conn = sqlite3.connect(settings.get_sqlite_path())
    cur = conn.cursor()
    fname = settings.get_value_path()
    f = open(fname)
    for line in f:
        raw = line.split(',')
        cur.execute(
            ''' UPDATE News SET sentiment = ?
                        WHERE id = ?''', (raw[1], raw[0]))
    conn.commit()
    print("stored over")
Beispiel #5
0
def number_of_news():
    """Calculate the number of useful news for one company."""
    conn = sqlite3.connect(settings.get_sqlite_path())
    cur = conn.cursor()
    fname = settings.get_number_path()
    f = open(fname, 'w')
    f.write('code,numberOfNews,numberOfNoContent,numberOfNone\n')

    companies = []
    # count the number of news of all companies
    cur.execute(''' SELECT code FROM Companies''')
    raws = cur.fetchall()
    for raw in raws:
        companies.append(raw[0])

    total_number = 0
    total_nocontent = 0
    total_none = 0
    for company in companies:
        cur.execute(
            ''' SELECT news_table_id FROM Company_News WHERE company_id = ?''',
            (company, ))
        newsids = cur.fetchall()
        number = 0
        none = 0
        nocontent = 0
        for newsid in newsids:
            # print(newsid)
            cur.execute(''' SELECT content FROM News WHERE id = ?''',
                        (newsid[0], ))
            text = cur.fetchone()[0]
            if text == 'None':
                none += 1
            elif text == 'NO CONTENT':
                nocontent += 1
            else:
                number += 1
        f.write(company + ',{},{},{}'.format(number, nocontent, none))
        f.write('\n')
        total_number += number
        total_nocontent += nocontent
        total_none += none
    f.write('total,{},{},{}'.format(total_number, total_nocontent, total_none))
    f.close()
    print('end of counting')
def get_value_list(start_table_id=1, end_table_id=100):
    """Return the list of sentimental value of news."""
    # loading the wordsbase to analysis
    wordsbase = wbl.get_wordsbase()
    conn = sqlite3.connect(settings.get_sqlite_path())
    cur = conn.cursor()
    value = {}
    for id in range(start_table_id, end_table_id + 1):
        cur.execute(
            ''' SELECT content FROM News
                        WHERE id = ?''', (id, ))
        raw = cur.fetchone()
        # print(raw)
        # newsid = raw[0]
        text = str(raw[0])
        if text != "None" and text != "NO CONTENT":
            value[id] = round(get_sentimental_value(text, wordsbase), 2)
    return value
def reset_sqlite():
    """Set the tables for storing news."""
    conn = sqlite3.connect(settings.get_sqlite_path())
    cur = conn.cursor()
    cur.executescript('''
        DROP TABLE IF EXISTS Companies;
        DROP TABLE IF EXISTS News;
        DROP TABLE IF EXISTS Source;
        DROP TABLE IF EXISTS Company_News;
        DROP TABLE IF EXISTS Stock;
        ''')
    cur.executescript('''
                CREATE TABLE  Companies (
                code CHAR(8) PRIMARY KEY UNIQUE,
                name TEXT UNIQUE);

                CREATE TABLE  News (
                id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
                news_id TEXT UNIQUE,
                source_id INTEGER,
                pubdate TEXT,
                time_window INTEGER,
                url TEXT UNIQUE,
                title TEXT,
                content TEXT,
                sentiment REAL);

                CREATE TABLE  Source (
                id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
                source TEXT NOT NULL UNIQUE NOT NULL);

                CREATE TABLE  Company_News(
                id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
                news_table_id INTEGER,
                company_id CHAR(8))
                ''')
    conn.commit()
    print("sqlite reset")
def pro_one_code(code='sz000001',
                 return_col=[
                     'code', 'p_var', 'mean_return', 'num_news', 'sum_abs_sent'
                 ]):
    conn = sqlite3.connect(settings.get_sqlite_path())
    # process the stock
    sql = ''' SELECT  date,close,volume,p_change  FROM Stock 
              WHERE code like {}'''.format('\'' + code + '\'')
    raw = pd.read_sql(sql=sql, con=conn, index_col='date')
    # raw = raw[columns]
    raw.index = pd.to_datetime(raw.index)
    draw = pd.DataFrame()
    draw['p_var'] = raw['p_change'].resample('W', label='left').var()
    draw['mean_return'] = raw['p_change'].resample('W', label='left').mean()
    draw = draw.assign(code=code)

    # process the news
    sql_news = '''  SELECT pubdate,source_id,sentiment FROM News LEFT OUTER JOIN Company_News 
                    ON News.id=Company_News.news_table_id
                    WHERE Company_News.company_id like {}'''.format('\'' +
                                                                    code +
                                                                    '\'')
    raw_news = pd.read_sql(sql=sql_news, con=conn, index_col='pubdate')
    raw_news.index = pd.to_datetime(raw_news.index)
    raw_news['abs_sentiment'] = raw_news.sentiment.abs()
    draw_news = pd.DataFrame()
    draw_news['sum_abs_sent'] = raw_news['abs_sentiment'].resample(
        'W', label='left').sum()
    draw_news['num_news'] = raw_news['sentiment'].resample(
        'W', label='left').count()
    # fill the NaN with 0 for news
    draw = draw.join(draw_news)
    draw[['sum_abs_sent', 'num_news']] = draw[['sum_abs_sent',
                                               'num_news']].fillna(0)
    # drop the NaN when the stock is not exchanged
    draw = draw.dropna()
    return draw[return_col]
Beispiel #9
0
# coding = utf-8
import data_load as dl
import settings
import sqlite3

dl.reset_sqlite()
dl.company_list_load()

conn = sqlite3.connect(settings.get_sqlite_path())
cur = conn.cursor()
cur.execute(''' SELECT code FROM Companies''')
raws = cur.fetchall()
companies = []

for raw in raws:
    companies.append(raw[0])
print(companies)

dl.news_load(companies)

dl.stock_load(companies)