Esempio n. 1
0
    def __init__(self, codeid=None, first100=False, *args, **kwargs):
        super(Quotes_itemSpider, self).__init__(*args, **kwargs)
        self.select_data()

        if codeid == '0000001':  # 上证指数
            codes = [['0000001', 'szzs']]
        else:
            s = T.select([T.listed_company.c.codeid, T.listed_company.c.shsz])
            if first100:
                var_dd = dd_pct()
                var_dd.select_all(common.wfunc.before_day(80))
                code_100 = var_dd.have_dd(30)
                print(code_100)
                s = T.select([
                    T.listed_company.c.codeid, T.listed_company.c.shsz
                ]).where(T.listed_company.c.codeid.in_(code_100))

            if codeid is not None:
                s = T.select([
                    T.listed_company.c.codeid, T.listed_company.c.shsz
                ]).where(T.listed_company.c.codeid == codeid)
            r = T.conn.execute(s)
            codes = r.fetchall()
        for item in codes:
            id = self.builde_code(item[0], item[1])
            # 调整编码长度
            self.start_urls.append(self.url_module %
                                   (str(id), self.startdata, self.enddata))
        print('共需查询:' + str(len(self.start_urls)) + '支股票行情.......')
Esempio n. 2
0
 def select_quotes(self, id, getpd=True):
     # 行情查询
     if (not getpd):
         r = T.select([T.quotes_item.c.quotes, T.quotes_item.c.update_at]).where(T.quotes_item.c.code_id == id)
         s = T.conn.execute(r)
         return s.fetchall()[0]
     r = T.select([T.quotes_item.c.quotes]).where(T.quotes_item.c.code_id == id)
     s = T.conn.execute(r)
     # json解析
     item = s.fetchall()[0]
     obj = json.loads(item[0])
     quotes = self.pd.DataFrame(obj)
     quotes = self.to_math(df=quotes, numeric=['gao', 'di', 'shou', 'kai', 'before',
                                               'zd_range', 'zd_money', 'liang'])
     return quotes
Esempio n. 3
0
 def select_cp_atd(self, Tb, a_type):
     #文章cp_attitude查询
     s = T.select(
         [Tb.c.put_time,
          Tb.c.cp_attitude]).where(Tb.c.code_id == self.code_id).where(
              Tb.c.put_time > self.start).where(Tb.c.article_type == a_type)
     pddata = self.select_atd(s, columns=['cp_attitude'])
     return pddata
Esempio n. 4
0
 def select_plates(self):
     s = T.select([T.listed_plate.c.plateid
                   ]).where(T.listed_plate.c.father_id != 0)
     r = T.conn.execute(s)
     for pid in r.fetchall():
         plateid = pid[0]
         if (len(str(plateid)) < 5):
             plateid = '0' + str(plateid)
         self.plates.append(plateid)
     s = T.select([T.listed_company.c.codeid])
     r = T.conn.execute(s)
     for code in r.fetchall():
         self.companys.append(str(code[0]))
     s = T.select([T.listed_region.c.id, T.listed_region.c.name])
     r = T.conn.execute(s)
     for region in r.fetchall():
         region_dict = {}
         self.regions[region[1]] = region[0]
Esempio n. 5
0
 def select_cp_atd(self, Tb, a_type):
     # 文章cp_attitude查询
     if str(self.code_id) == '1000001':
         where = Tb.c.code_id is not None
     else:
         where = Tb.c.code_id == self.code_id
     s = T.select([Tb.c.put_time, Tb.c.cp_attitude]).where(where).where(
         Tb.c.put_time > self.start).where(Tb.c.article_type == a_type)
     pddata = self.select_atd(s, columns=['cp_attitude'])
     return pddata
Esempio n. 6
0
def make_keywords(txt_path):
    s = T.select([T.ch_dict.c.word, T.ch_dict.c.rate, T.ch_dict.c.nature])
    r = T.conn.execute(s)
    f = open(txt_path, 'w', encoding='utf-8')

    for i in r.fetchall():
        tmp = dict(i)
        f.write(i['word'] + ' ' + str(int(i['rate'])) + ' ' + i['nature'] +
                '\n')
    f.close()
    def __init__(self, *args, **kwargs):
        # 调用父类沟站函数
        super(TopicVreport, self).__init__(*args, **kwargs)

        # 查询已经存在的地址
        s = T.select([T.topic.c.url])
        r = T.conn.execute(s)
        arr = r.fetchall()
        for one in arr:
            self.old_link.append(one[0])
Esempio n. 8
0
def thecompany():
    s = T.select([T.listed_company.c.name, T.listed_company.c.id])
    r = T.conn.execute(s)
    companies = update_ab(r)
    for cp in companies:
        u = T.listed_company.update().where(
            T.listed_company.c.id == cp[1]).values(name=cp[0])
        r = T.conn.execute(u)
        if r.rowcount == 1:
            print(cp[0], '修改成功...')
Esempio n. 9
0
def thechdict():
    s = T.select([T.ch_dict.c.word,
                  T.ch_dict.c.id]).where(T.ch_dict.c.rate == '100000.00')
    r = T.conn.execute(s)
    new_dicts = update_ab(r)
    for cp in new_dicts:
        u = T.ch_dict.update().where(T.ch_dict.c.id == cp[1]).values(
            word=cp[0])
        r = T.conn.execute(u)
        if r.rowcount == 1:
            print(cp[0], '修改成功...')
Esempio n. 10
0
def add_dict():
    ch_s = T.select([T.ch_dict.c.word]).where(T.ch_dict.c.rate == '100000.00')
    ch_r = T.conn.execute(ch_s)
    ch_dict = []
    for i in ch_r.fetchall():
        ch_dict.append(i[0])
    print(ch_dict)
    cp_s = T.select([T.listed_company.c.name])
    cp_r = T.conn.execute(cp_s)
    new_dict = []
    for n in cp_r.fetchall():
        if n[0] not in ch_dict:
            tmp = {}
            tmp['word'] = n[0]
            tmp['rate'] = '100000'
            tmp['nature'] = 'nts'
            new_dict.append(tmp)
    i = T.ch_dict.insert()
    r = T.conn.execute(i, new_dict)
    if r.rowcount > 0:
        print(r.rowcount, '新添加')
Esempio n. 11
0
 def start_requests(self):
     s = T.select([T.listed_plate.c.plateid], ).where(
         T.listed_plate.c.plateid > 4000).where(
             T.listed_plate.c.father_id > 0)
     r = T.conn.execute(s)
     metas = []
     for i in r.fetchall():
         metas.append(str(i[0]))
     print(metas)
     return [
         Request(self.start_urls[0],
                 meta={'plates': metas},
                 callback=self.parse)
     ]  #请求网页,并把cookie保存在meta中
Esempio n. 12
0
 def open_spider(self, spider):
     wfunc.e('spider ' + spider.name + ' --->opend')
     if spider.name in ['ddtj', 'ddtj_history']:
         s = T.select([T.ddtj.c.only_id])
         r = T.conn.execute(s)
         ddtj_onlyid = []
         for item in r.fetchall():
             ddtj_onlyid.append(item[0])
         self.ddtj_onlyid = ddtj_onlyid
     if spider.name == 'xueqiu_zuhe':
         s = T.select([T.xq_zuhe.c.zh_symbol])
         r = T.conn.execute(s)
         zh_list = []
         for item in r.fetchall():
             zh_list.append(item[0])
         self.zh_list = zh_list
     if spider.name == 'zuhe_change':
         s = T.select([T.zuhe_change.c.change_id])
         r = T.conn.execute(s)
         change_list = []
         for item in r.fetchall():
             change_list.append(item[0])
         self.change_list = change_list
Esempio n. 13
0
 def __init__(self, *args, **kwargs):
     super(QuotesSpider, self).__init__(*args, **kwargs)
     self.select_data()
     s = T.select([T.listed_company.c.codeid
                   ]).where(T.listed_company.c.codeid < 10)
     r = T.conn.execute(s)
     for item in r.fetchall():
         id = str(item[0])
         # 调整编码长度
         if (len(id) < 6):
             while len(id) < 6:
                 id = '0' + id
             id = '1' + id
         self.start_urls.append(self.url_module %
                                (str(id), self.startdata, self.enddata))
     print('共需查询:', len(self.start_urls), '支股票行情.......')
Esempio n. 14
0
 def select_change(self):
     quotes_data = self.select_quotes(code_id)
     s = T.select([T.zuhe_change]).where(T.zuhe_change.c.code_id == code_id)
     r = T.conn.execute(s)
     if (r.rowcount < 1):
         return ''
     data_arr = []
     for i in r.fetchall():
         # 数据库查询得到字典
         i = dict(i)
         i['updated_at'] = wfunc.the_day(int(int(i['updated_at']) / 1000))
         data_arr.append(i)
     pandas_change = self.pd.DataFrame(data_arr)
     pandas_change['datatime'] = self.pd.to_datetime(
         pandas_change['updated_at'], format='%Y-%m-%d')
     pd_mean = pandas_change.groupby('datatime',
                                     as_index=False)['change_status'].agg(
                                         {'change_status': 'sum'})
     pd_count = pandas_change.groupby('datatime',
                                      as_index=False)['change_status'].agg(
                                          {'change_count': 'count'})
     quotes_data['datatime'] = self.pd.to_datetime(quotes_data['datatime'],
                                                   format='%Y-%m-%d')
     del pandas_change['updated_at']
     pd_mean = self.pd.merge(quotes_data,
                             pd_mean,
                             on=['datatime'],
                             how='left').fillna(0)
     pd_mean = self.pd.merge(pd_mean, pd_count, on=['datatime'],
                             how='left').fillna(0)
     pd_mean = pd_mean.sort_values(by='datatime', ascending=True)
     # pd_mean['change_status'].fillna(method='pad')    # 用前值填充NaN值
     pd_mean['cumsum'] = pd_mean['change_status'].cumsum()  # cumsum累加  前值的和
     result = self.web_data(pd_mean,
                            'datatime',
                            columns=['cumsum', 'shou', 'change_count'])
     return result
Esempio n. 15
0
    def process_item(self, item, spider):
        # 提取时间过滤
        # if('put_time' in dict(item)):
        #     if(float(item['put_time'])<self.min_time):
        #         return None
        self.add_nums += 1
        # 新闻文章
        if (isinstance(item, NewsItem)):
            # 去除body中的html标签
            item['body'] = wfunc.delete_html(item['body'])
            s = T.select([T.news]).where(T.news.c.only_id == item['only_id'])
            r = T.conn.execute(s)
            if (r.rowcount > 0):
                return None
            i = T.news.insert()
            r = T.conn.execute(i, dict(item))
            # 语义分析
            att_item = item
            att_item['article_id'] = r.inserted_primary_key
            att_item['article_type'] = 2
            result = self.news_analyse.run(att_item)
            self.add_attitude_relation(result)
        # 专题分析文章
        elif (isinstance(item, TopicItem)):
            # 去除body中的html标签
            item['body'] = wfunc.delete_html(item['body'])
            s = T.select([T.topic]).where(T.topic.c.only_id == item['only_id'])
            r = T.conn.execute(s)
            if (r.rowcount > 0):
                return None
            i = T.topic.insert()
            r = T.conn.execute(i, dict(item))
            # 语义分析
            att_item = item
            att_item['article_id'] = r.inserted_primary_key
            att_item['article_type'] = 1
            result = self.topic_analyse.run(att_item)
            self.add_attitude_relation(result)

        # 股票代码
        elif (isinstance(item, CodesItem)):
            if (spider.name in ['codes', 'newcodes']):
                s = T.select([
                    T.listed_company
                ]).where(T.listed_company.c.codeid == item['codeid'])
                r = T.conn.execute(s)
                if (r.rowcount > 0):
                    return None
                i = T.listed_company.insert()
                r = T.conn.execute(i, dict(item))
            elif (spider.name == 'upplates'):
                u = T.listed_company.update().where(
                    T.listed_company.c.codeid == item['codeid']).values(
                        plate_id=item['plate_id'])
                r = T.conn.execute(u)

        # 股票行情
        elif (isinstance(item, QuotesItem)):
            i = T.quotes.insert()
            r = T.conn.execute(i, dict(item))

        # 股票板块
        elif (isinstance(item, PlatesItem)):
            s = T.select([T.listed_plate
                          ]).where(T.listed_plate.c.plateid == item['plateid'])
            r = T.conn.execute(s)
            if (r.rowcount > 0):
                return None
            i = T.listed_plate.insert()
            r = T.conn.execute(i, dict(item))

        # 公司公告
        elif (isinstance(item, NoticesItem)):
            s = T.select([
                T.company_notice
            ]).where(T.company_notice.c.title == item['title']).where(
                T.company_notice.c.code_id == item['code_id'])
            r = T.conn.execute(s)
            if (r.rowcount > 0):
                return None
            i = T.company_notice.insert()
            r = T.conn.execute(i, dict(item))

        # 问答
        elif (isinstance(item, QandaItem)):
            s = T.select([T.qanda.c.id
                          ]).where(T.qanda.c.only_id == item['only_id'])
            r = T.conn.execute(s)
            if (r.rowcount > 0):
                return None
            i = T.qanda.insert()
            r = T.conn.execute(i, dict(item))

        return None