def run(self, dao: Dao, h1): h1('twitter_friends_sum取得') tbl_tfs = dao.table('twitter_friends_sum') tfs_list = tbl_tfs.full_scan() d = DateTimeUtil.now() - timedelta(days=15) two_week_ago = d.strftime('%Y/%m/%d_00:00:00') h1('集計開始') for tfs in tfs_list: for cd in tfs['rank']: ds = tfs['rank'][cd]['ds'] n_ds = [] for r in ds: if r['d'] >= two_week_ago: n_ds.append(r) if n_ds: tfs['rank'][cd]['ds'] = n_ds else: print("tfs['rank'][cd] = 空. cd={}".format(cd)) del tfs['rank'][cd] if tfs['rank']: tbl_tfs.put_item(Item=tfs) else: print('空。 uid={}'.format(tfs['uid'])) tbl_tfs.delete_item_silent({"S": tfs['uid']}) h1('終了')
def run(self, dao, req): Log.info('twitter_friendsを全件取得。') friends = self.dao.table('twitter_friends').find({}) Log.info('インフルエンサー毎に過去15日のツイートを集計する。') for friend in friends: cur_repo = self.dao.table('stock_report_pre').find({'tweets.user_id': friend['id_str']}) month_ago = DateTimeUtil.now() - timedelta(days=15) user_tweets = [] for r in cur_repo: u_tws = [t for t in r['tweets'] if t['user_id'] == friend['id_str'] and t['created_at'] >= month_ago] if u_tws: ccode_group_list = [t for t in user_tweets if t['ccode'] == r['ccode']] ccode_group = {'name': '', 'last_update_date': month_ago, 'tweet': []} if ccode_group_list: ccode_group = ccode_group_list[0] else: ccode_group['ccode'] = r['ccode'] brand = self.dao.table('stock_brands').find_one({'ccode': r['ccode']}) if brand: ccode_group['name'] = brand['name'] user_tweets.append(ccode_group) ccode_group['last_update_date'] = max( ccode_group['last_update_date'], max([t['created_at'] for t in u_tws])) ccode_group['tweet'].extend(u_tws) user_tweets = sorted(user_tweets, key=lambda x: x['last_update_date'], reverse=True) ret = [] for r in user_tweets: r['is_market_time'] = DateTimeUtil.is_market_time(r['last_update_date']) str_day = r['last_update_date'].strftime('%Y/%m/%d') dat = [d for d in ret if d['str_day'] == str_day] if dat: dat[0]['tweet'].append(r) else: buf = {'str_day': str_day, 'day': r['last_update_date'], 'tweet': [r]} ret.append(buf) friend['tweet_summary'] = ret dao.table('twitter_friends').update_one({'id_str': friend['id_str']}, {'$set': friend}) Log.debug('登録: {}', friend['screen_name']) Log.info('終了')
def run(self, dao: Dao, h1): h1('stock_report取得') repos = dao.table('stock_report').full_scan() d = DateTimeUtil.now() - timedelta(days=15) two_week_ago = d.strftime('%Y/%m/%d_00:00:00') friend_tweet = {} ret_list = [] h1('ツイート集計開始') for repo in repos: cd = repo['ccode'] tw_ids = repo['tweets'] tweets = dao.table("tweet").find_batch(tw_ids) tweets = [t for t in tweets if t['created_at'] > two_week_ago] tweets = sorted(tweets, key=lambda r: r['created_at'], reverse=True) for t in tweets: u_id = t['user_id'] d = t['created_at'] ob = {'nm': repo['name'], 'ds': [{'d': d, 't': t['id_str']}]} if u_id in friend_tweet: if cd in friend_tweet[t['user_id']]: friend_tweet[u_id][cd]['ds'].append({ 'd': d, 't': t['id_str'] }) else: friend_tweet[u_id][cd] = ob else: friend_tweet[t['user_id']] = {} friend_tweet[t['user_id']][cd] = ob for k in friend_tweet: r = {'uid': k, 'rank': friend_tweet[k]} ret_list.append(r) dao.table('twitter_friends_sum').insert(ret_list) print('hoge')
def day_key_now(self): return DateTimeUtil.strf_ymd_st(DateTimeUtil.now())
def run(self, dao: Dao, h1): edinet = Edinet() start_day = DateTimeUtil.yesterday() end_day = DateTimeUtil.now() # start_day = datetime(2018, 5, 19) # end_day = datetime(2018, 5, 20) h1('EDINETの書類検索から指定期間の有価証券報告書の検索結果を取得する。') search_rows = edinet.get_report_search_results(start_day, end_day) search_rows.reverse() # search_rows = [r for r in search_rows if 'スタートトゥデイ' in r['company_name']] h1('検索結果から有価証券報告書を取得し、銘柄毎スクレイピング開始。') dao_identify = dao.table('stock_identify') dao_brand = dao.table('stock_brands') not_brands = [] for search_row in search_rows: data_html = edinet.get_report_html(search_row['syorui_kanri_no']) shorui_mei = search_row['syorui_mei'] Log.info('html取得完了 : {} : {}'.format(search_row['company_name'], shorui_mei)) if '大株主' not in data_html: Log.warn('HTMLに大株主が無い。別資料かも。 company_name : {}'.format( search_row['company_name'])) continue table_list = edinet.report_html_to_split_table_list( data_html, shorui_mei) Log.info('HTML -> table分割 : {}'.format(search_row['company_name'])) dat = {} for t in table_list: if t['title'] == '発行済株式' and 'outstanding_share' not in dat: dat['outstanding_share'] = edinet.outstanding_share( t, search_row) Log.debug('** 発行済株式 : {}'.format( search_row['company_name'])) if t['title'] == '所有者別状況': dat['holder_rate'] = edinet.holder_rate_status( t, search_row) Log.debug('** 所有者別状況 : {}'.format( search_row['company_name'])) if t['title'] == '大株主の状況': dat['holders'] = edinet.major_shareholders(t, search_row) Log.debug('** 大株主の状況 : {}'.format( search_row['company_name'])) if not dat: continue name = search_row['company_name'].replace('(株)', '').replace(' ', '') name = re.sub(r'^ ', '', name) name = re.sub(r' $', '', name) identify = dao_identify.find_query({'nm': name}) if not identify: name_s = zenhan.z2h(name, mode=1) identify = dao_identify.find_query({'nm': name_s}) if not identify: name_s = name_s.replace(' ', '') identify = dao_identify.find_query({'nm': name_s}) if identify and len(identify) == 1: brand = dao_brand.find_by_key(identify[0]['ccode']) Log.info('stock_brands証券コード取得。 ccode, name : {}, {}'.format( brand['ccode'], name)) dat['ccode'] = brand['ccode'] dat['name'] = brand['name'] dat['date'] = DateTimeUtil.strf_ymd_st( DateTimeUtil.date_from_japanese_era( search_row['submition_day'], short=True)) if empty(dat, 'outstanding_share') or empty( dat, 'holder_rate') or empty(dat, 'holders'): edi = dao.table('stock_edinet').find_by_key(dat['ccode']) if edi: if empty(dat, 'outstanding_share') and not empty( edi, 'outstanding_share'): dat['outstanding_share'] = edi['outstanding_share'] if empty(dat, 'holder_rate') and not empty( edi, 'holder_rate'): dat['holder_rate'] = edi['holder_rate'] if empty(dat, 'holders') and not empty(edi, 'holders'): dat['holders'] = edi['holders'] h1('stock_edinetに登録') dao.table('stock_edinet').put_item(Item=dat) else: dao.table('err_value').put_item( Item={ 'key': 'Job013_edi_name', 'd': DateTimeUtil.str_now(), 'val': name }) h1('終了')