def __init__(self, el, _between, _and, _after, _before): self.when = el.get('when') try: self.when_ymd = to_ymd(self.when) self.when_formatted = format_date(self.when) except: self.when_formatted = self.when self.notBefore = el.get('notBefore') self.notBefore_ymd = to_ymd(self.notBefore) self.notBefore_formatted = format_date(self.notBefore) self.notAfter = el.get('notAfter') self.notAfter_ymd = to_ymd(self.notAfter) self.notAfter_formatted = format_date(self.notAfter) self.date_text = el.find('date') is not None and el.find('date').text or '' self.place = el.find('place') is not None and el.find('place').text or '' self.place_id = el.get('place_id') self.type = el.get('type') start = self.notBefore_formatted stop = self.notAfter_formatted if not self.date_text: if start and stop: self.when_formatted = "%(_between)s %(start)s %(_and)s %(stop)s" % locals() elif start: self.when_formatted = "%(_after)s %(start)s" % locals() elif stop: self.when_formatted = "%(_before)s %(stop)s" % locals() else: self.when_formatted = ''
def copy_pool_dragon(self, from_date, end_date): from_date = common.format_date(from_date, "%Y%m%d") from_results = self.mongodb.concepts.dragon_pool.find( {"date": from_date}) if from_results.count() == 0: raise BaseException( "No dragon pool record for date:%s, failed to copy" % from_date) # 昨日的记录 from_result = from_results[0] # 今日数据库的记录 end_date = common.format_date(end_date, "%Y%m%d") end_results = self.mongodb.concepts.dragon_pool.find( {"date": end_date}) end_result = {} if end_results.count() > 0: end_result = end_results[0] del end_result['_id'] # 二者合并,以end_date的记录进行覆盖式添加 del from_result['_id'] del from_result['date'] for concept in end_result: if len(end_result[concept]) > 0: from_result[concept] = end_result[concept] from_result['date'] = end_date self.mongodb.concepts.dragon_pool.update({"date": end_date}, from_result, upsert=True)
def add_video(episode_tree, episode = False): try: episodes = [] episode_menu = episode_tree.find_all(itemtype = 'http://schema.org/TVEpisode') if not episode_menu: episode_menu = episode_tree.find_all(itemtype = 'http://schema.org/VideoObject') for episode_item in episode_menu: if episode == False or episode_item.find(class_ = 'episode'): episode_name = episode_item.find('meta', itemprop = 'name')['content'] episode_plot = episode_item.find('meta', itemprop = 'description')['content'] url = episode_item.find('meta', itemprop = 'url')['content'] try: episode_thumb = episode_item.find('meta', itemprop = 'image')['content'].split('?')[0] except: try: episode_thumb = episode_item.find('meta', itemprop = 'thumbnailUrl')['content'].split('?')[0] except: episode_thumb = episode_item.find('img')['src'].split('?')[0] try: episode_airdate = episode_item.find('meta', itemprop = 'uploadDate')['content'] except: try: episode_airdate = episode_item.find('meta', itemprop = 'datePublished')['content'] try: episode_airdate = common.format_date(episode_airdate, '%B %d, %Y') except: episode_airdate = common.format_date(episode_airdate, '%b %d, %Y') except: episode_airdate = -1 try: episode_duration = episode_item.find('meta', itemprop = 'duration')['content'] try: duration_mins, duration_seconds = re.compile('([0-9]*)M([0-9]*)S').findall(episode_duration)[0] episode_duration_seconds = int(duration_mins) * 60 + int(duration_seconds) except: episode_duration_seconds = int(episode_duration.replace('S', '').replace('T','')) except: episode_duration_seconds = -1 try: episode_meta = episode_item.find('div', class_ = 'video_meta').text.split('|')[0] season_number = int(episode_meta.split('-')[0].replace('Season', '').strip()) episode_number = int(episode_meta.split('-')[1].replace('Episode', '').strip()[1:]) except: season_number = -1 episode_number = -1 u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration_seconds, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate } episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', False, 'Full Episode')) return episodes except: pass
def updateCacheFromCsv(self, start_date, end_date): start_date = common.format_date(start_date, "%Y%m%d") end_date = common.format_date(end_date, "%Y%m%d") results = self.mongodb.stock.ZDT_by_date.find({"date":{"$gte":start_date, "$lte":end_date}}) # 获取指定日期之间的交易日期 for result in results: date = result['date'] print "updating L1/L2 from csv of date:%s ..." %date self.updateAnotationL2CsvByDate(date) self.updateCacheCsvByDate(date)
def oneDate_freshStocks(self, cdate, predate, mongodb): predate = common.format_date(predate, "%Y%m%d") cdate = common.format_date(cdate, "%Y%m%d") yesResult = mongodb.stock.ZDT_by_date.find_one({"date": predate}) yesFreshStocks = [] ## 今日的次新一字板股,应该是昨天的freshStock(连续过来的新股)以及昨天Add_newStocks中的股票 if yesResult is not None and yesResult.has_key("freshStocks"): yesFreshStocks.extend(yesResult['freshStocks'].split("_")) if yesResult is not None and yesResult.has_key("Add_newStocks"): yesFreshStocks.extend(yesResult['Add_newStocks'].keys()) ##再把今天上市的新股加进来 todayNewadds = [] todayResult = mongodb.stock.ZDT_by_date.find_one({"date": cdate}) if todayResult.has_key("Add_newStocks"): yesFreshStocks.extend(todayResult['Add_newStocks'].keys()) todayNewadds.extend(todayResult['Add_newStocks'].keys()) ##去重 yesFreshStocks = list(set(yesFreshStocks)) ##和涨停股票进行比较 todayZTs = todayResult['ZT_stocks'].split("_") ##今日涨停股票 freshStocks = [x for x in yesFreshStocks if x in todayZTs] ##今日的次新一字涨停股 # 对于不在todayZTs中的新股,如果是临时停牌或者其它原因,不应该去除 recheckStocks = [x for x in yesFreshStocks if x not in todayZTs] for stocks in recheckStocks: if len(stocks) == 0: continue recheckFrame = common.get_daily_frame(stocks, cdate, cdate) if recheckFrame.loc[0, 'HIGHEST_PRICE'] == 0: print "will add %s to freshlist because abnormal situation" % stocks freshStocks.append(stocks) freshStocks.extend(todayNewadds) openedFreshStocks = [ x for x in yesFreshStocks if x not in todayZTs and x not in todayNewadds ] ##次新股中今天的开板股票 actulZtStocks = [x for x in todayZTs if x not in yesFreshStocks] ##去除次新股的今日自然涨停股票 freshStocks = list(set(freshStocks)) freshStocks = "_".join(freshStocks) openedFreshStocks = "_".join(openedFreshStocks) actulZtStocks = "_".join(actulZtStocks) dicts = { "freshStocks": freshStocks, "actulZtStocks": actulZtStocks, "openedFreshStocks": openedFreshStocks } mongodb.stock.ZDT_by_date.update({"date": cdate}, {"$set": dicts}, True) print "update 次新/涨停去除次新 for %s" % cdate
def concept_pipeline_html(self, default_period=30, csv_dir=u'D:/Money/modeResee/复盘'): concepts = common.get_focused_concepts() # 热力图的纵轴 back_day = common.get_lastN_date(self.day, default_period) day_list = common.getDate(back_day, self.day) # 热力图横轴 zt_concepts_group = [0] * (len(day_list)*len(concepts)) hd_concepts_group = [0] * (len(day_list)*len(concepts)) dt_concepts_group = [0] * (len(day_list)*len(concepts)) meat_concepts_group = [0] * (len(day_list)*len(concepts)) hole_concepts_group = [0] * (len(day_list)*len(concepts)) group_list = [zt_concepts_group, hd_concepts_group, dt_concepts_group, meat_concepts_group, hole_concepts_group] character_list = ['ZT', 'HD', 'DT', 'meat', 'hole'] # character_list = ['ZT'] group_list = group_list[:len(character_list)] for i in range(0, len(day_list)): tarday = day_list[i] print tarday tarday = common.format_date(tarday, "%Y%m%d") dframe = pd.read_csv(os.path.join(csv_dir, "%s/daydayup.csv" %tarday), encoding='gbk') dframe['len_status'] = dframe['stock'].apply(lambda x: len(str(x))) dframe = dframe[dframe.len_status > 0] for j in range(0, len(concepts)): concept = concepts[j] # 相近的名称 if concept in common.close_concept_dict.keys(): concept_near_list = common.close_concept_dict[concept] elif concept in common.close_concept_value_dict.keys(): real_concept = common.close_concept_value_dict[concept] concept_near_list = common.close_concept_dict[real_concept] else: concept_near_list = [concept] cframe = dframe[dframe.group.isin(concept_near_list)] for k in range(0, len(character_list)): tmp_frame = cframe[cframe.type == character_list[k]] stock_name = list(set(list(np.unique(tmp_frame.name.values)))) num = len(stock_name) stock_name = "_".join(stock_name) position = j*len(day_list) + i group_list[k][position] = [j, i, num, stock_name] dframe_list = [] day_list = [common.format_date(x, "%m%d") for x in day_list] for i in range(0, len(group_list)): dframe_list.append(common.get_dataframe_option3(group_list[i], "_".join(concepts), "_".join(day_list), character_list[i])) type_list = [3] * len(dframe_list) title_list = character_list save_day = common.format_date(self.day, "%Y%m%d") common.get_html_curve(dframe_list, u"concepts_flow", html_types=type_list, title_list=title_list, save_dir=os.path.join(u"D:/Money/modeResee/复盘/%s"%save_day, ""))
def baseInfo(self, tframe): dicts = {} ZT_list = [ str(int(x)) if (len(str(int(x))) == 6) else '0' * (6 - len(str(int(x)))) + str(int(x)) for x in tframe['up10'].values if x > 0 ] DT_list = [ str(int(x)) if (len(str(int(x))) == 6) else '0' * (6 - len(str(int(x)))) + str(int(x)) for x in tframe['dn10'].values if x > 0 ] HD_list = [ str(int(x)) if (len(str(int(x))) == 6) else '0' * (6 - len(str(int(x)))) + str(int(x)) for x in tframe['high10'].values if x > 0 ] LD_list = [ str(int(x)) if (len(str(int(x))) == 6) else '0' * (6 - len(str(int(x)))) + str(int(x)) for x in tframe['low10'].values if x > 0 ] ZT_list = list(set(ZT_list)) DT_list = list(set(DT_list)) HD_list = list(set(HD_list)) LD_list = list(set(LD_list)) dicts['ZT_num'] = len(ZT_list) dicts['DT_num'] = len(DT_list) dicts['ZT_stocks'] = "_".join(ZT_list) dicts['DT_stocks'] = "_".join(DT_list) dicts['HD_stocks'] = "_".join(HD_list) dicts['LD_stocks'] = "_".join(LD_list) dicts['date'] = common.format_date(self.cdate, "%Y%m%d") return [dicts, ZT_list, DT_list, HD_list, LD_list]
def crawl_fp(date, out_dir = u"D:/Money/modeResee/复盘/网络复盘/凤凰"): try: date = common.format_date(date, "%Y%m%d") year = date[:4] month = date[4:6] day = date[-2:] if int(year) < 2016 and int(month) <7 and int(day) < 27: # 20150627 之前 url = "http://finance.ifeng.com/TMP/special/ztbfp%s%s/index.shtml"%(month, day) else: # 20150628之后 url = "http://finance.ifeng.com/TMP/special/ztbfp%s/index.shtml"%date[2:] if int(date) > 20151126: url = url.replace("TMP", "mrztbfp") else: url = url.replace("TMP", 'news') while True: r = rs.get(url) if r.status_code != 200: time.sleep(5) else: break root = etree.HTML(r.content) fig_address = root.xpath('//div[@class="pic01"]//img/@src')[0] # 下载图片 while True: r = rs.get(fig_address) if r.status_code == 200: break else: time.sleep(5) with open(os.path.join(out_dir, "%s.jpg"%date), 'wb') as fHandler: fHandler.write(r.content) return True except Exception, e: print e return False
def updateNewHighlist(self): results = self.mongodb.stock.HighestPrice.find({}) dframe = pd.DataFrame() count = 0 for result in results: dframe.loc[count,'stcid'] = result['stcid'] dframe.loc[count,'Hprice'] = result['Hprice'] count += 1 stocklist = dframe['stcid'].values dframe.set_index('stcid',inplace=True) zframe = common.get_sina_data(stocklist) zframe = zframe[['stcid','close']] zframe.set_index('stcid',inplace=True) dframe = pd.concat([dframe,zframe],axis=1) dframe.reset_index(len(dframe),inplace=True) dframe.columns = ['stcid','Hprice','close'] dframe['hrate'] = dframe['close'].astype(np.float64)/dframe['Hprice'].astype(np.float64) dmframe = dframe[dframe.hrate > 0.95] duframe = dframe[dframe.hrate >= 1] freshstocks = self.mongodb.stock.ZDT_by_date.find_one({"date":self.yestoday})['freshStocks'].split("_") self.nearNewHigh.update([x for x in dmframe.stcid.values if x not in freshstocks]) self.mongodb.stock.ZDT_by_date.update({"date":self.today},{"$set":{"nearNewHigh":"_".join(self.nearNewHigh),"date":self.today}},True) for i in duframe.index.values: stcid = duframe.loc[i,'stcid'] price = duframe.loc[i,'close'] self.mongodb.stock.HighestPrice.update({"stcid":stcid},{"$set":{"Hdate":common.format_date(self.today,"%Y/%m/%d"),"Hprice":price}},True) print "update highest for %s @ %s"%(stcid,price)
def episodes(episode_url = common.args.url): episodes = [] episode_data = connection.getURL(episode_url, header = AUTH) episode_menu = simplejson.loads(episode_data)['videos'] for episode_item in episode_menu: episode_airdate = common.format_date(episode_item['airDate'],'%Y-%m-%d', '%d.%m.%Y') url = episode_item['ios_video_url'] episode_duration = int(episode_item['duration']) episode_plot = episode_item['description'] episode_name = episode_item['name'] try: season_number = episode_item['season'] except: season_number = -1 try: episode_number = episode_item['episode'] except: episode_number = -1 try: episode_thumb = episode_item['img_url'].replace(' ', '%20') except: episode_thumb = None u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate } episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', False, 'Clip')) return episodes
def updateFpList(self): last_day = common.get_lastN_date(self.today, 1) last2_day = common.get_lastN_date(self.today, 2) day_list = [self.today, last_day, last2_day] day_list = [common.format_date(x, "%Y-%m-%d") for x in day_list] # 关注的复牌日 results = self.mongodb.stock.FP.find({ "fp_time": { "$in": day_list }, "dp_ratio": { "$exists": True } }) for result in results: if result['delta_days'] < 20: continue if result['dp_ratio'] >= 0: direction = "+" else: direction = "-" self.fpDict[result['stcid']] = [direction, result['target_price']] stcid_string = "_".join(self.fpDict.keys()) # 更新mongo数据库 self.mongodb.stock.ZDT_by_date.update( {"date": self.today}, {"$set": { "fp_list": stcid_string }}, True, True)
def episodes(episode_url = common.args.url): episodes = [] episode_data = connection.getURL(episode_url) episode_json = simplejson.loads(episode_data)['result'] episode_menu = episode_json['data'] title = episode_json['title'] valid_login = None for episode_item in episode_menu: url = BASE + episode_item['url'] if episode_item['status'] == 'PREMIUM' and valid_login is None: valid_login = login(url) print "login", valid_login if episode_item['status'] == 'AVAILABLE' or (valid_login and episode_item['status'] == 'PREMIUM'): videourl = episode_item['streaming_url'] HD = False episode_duration = int(common.format_seconds(episode_item['duration'])) episode_airdate = common.format_date(episode_item['airdate'], '%m/%d/%y') if len(episode_item['label']) < len(episode_item['title']) and episode_item['label']: episode_name = episode_item['label'] else: episode_name = episode_item['title'] try: season_number = int(episode_item['season_number']) except: season_number = -1 try: episode_number = int(episode_item['episode_number']) except: episode_number = -1 try: episode_thumb = episode_item['thumb']['large'] except: episode_thumb = None episode_plot = episode_item['description'] try: episode_mpaa = re.compile('\((.*)\)$').findall(episode_plot)[0] except: episode_mpaa = None show_title = episode_item['series_title'] episode_expires = episode_item['expiry_date'] episode_type = episode_item['type'] if url is not None: u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate, 'mpaa' : episode_mpaa, 'TVShowTitle': show_title} infoLabels = common.enrich_infolabels(infoLabels, episode_expires) episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', HD, episode_type)) else: pass return episodes
def get_dataframe_option5(start_date, end_date, target_concept, stockid, tframe): global concept_data_frame_list if target_concept not in concept_data_frame_list.keys(): print "will get all_concept_stock_dailyframe for %s" % target_concept group_frame, all_concept_stock_dailyframe = get_group_frame( start_date, end_date, target_concept, tframe) concept_data_frame_list[target_concept] = [ group_frame, all_concept_stock_dailyframe ] else: [group_frame, all_concept_stock_dailyframe ] = concept_data_frame_list[target_concept] signal_frame = all_concept_stock_dailyframe[ all_concept_stock_dailyframe.TICKER_SYMBOL == stockid] stock_name = signal_frame.iloc[0, :]['SEC_SHORT_NAME'] signal_frame = signal_frame[['TRADE_DATE', 'rate']] signal_frame.rename(columns={ "TRADE_DATE": 'date', 'rate': "ratio" }, inplace=True) signal_frame['date'] = signal_frame['date'].apply( lambda x: common.format_date(x, "%Y-%m-%d")) signal_frame['count'] = 1 signal_frame['data_flag'] = 2 outframe = pd.concat([signal_frame, group_frame], axis=0) name_list = [target_concept, stock_name] return outframe, name_list
def __init__(self): mongoUrl = "localhost" self.freshList = set() self.forcusList = set() self.nearNewHigh = set() self.stockList = set() self.fpDict = {} self.mongodb = pymongo.MongoClient(mongoUrl) self.today = datetime.date.today().strftime("%Y%m%d") # self.today = "20170519" self.yestoday = common.get_last_date(self.today) self.yestoday = common.format_date(self.yestoday, "%Y%m%d") # print self.yestoday self.updateNewHighlist() while (True): ttime = time.localtime() thour = ttime.tm_hour tmin = ttime.tm_min if (thour > 9) or (thour == 9 and tmin >= 30): break time.sleep(10) self.updatefreshlist() # 次新股 self.updateForcusList() # 关注股 self.updateFpList() # 复牌股
def add_fullepisodes_southpark(episode_tree): try: season = urllib.unquote(sys.argv[2].split('&')[2].split('=')[1].replace('%22','')).split(' ')[1] episode_menu = episode_tree.find_all('article', class_ = 'thumb') for episode_item in episode_menu: episode_name = episode_item.find(class_ = 'title') if episode_name is None: continue url = episode_item.a['href'] try: season_number, episode_number = re.compile('s([0-9]{2})e([0-9]{2})').findall(url)[0] except: episode_number = -1 season_number = -1 if int(season) != int(season_number): continue episode_name = episode_name.string.strip() episode_plot = episode_item.find('p', class_ = 'episode').string.strip() episode_airdate = episode_item.find(class_ = 'air-date').string.strip() episode_airdate = common.format_date(episode_airdate , '%m.%d.%Y', '%d.%m.%Y') episode_thumb = re.match('(.*?)url\(\'(.*?)\'\)', episode_item.find('a', class_ = 'fill')['style']).group(2) u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate } common.add_video(u, episode_name, episode_thumb, infoLabels = infoLabels, quality_mode = 'list_qualities') except: pass
def updatefreshlist(self): tardate = common.get_lastN_date(self.today, 120) tardate = common.format_date(tardate, "%Y%m%d") results = self.mongodb.stock.ZDT_by_date.find({ "date": { "$gte": tardate, "$lte": self.today }, "Add_newStocks": { "$exists": True } }) for result in results: tmp = result['Add_newStocks'].keys() if u'未公布' in tmp: tmp.remove(u'未公布') self.freshList.update(tmp) tmp = list(self.freshList) todayfresh = self.mongodb.stock.ZDT_by_date.find_one( {'date': self.yestoday})['freshStocks'] tmp = [x for x in self.freshList if x not in todayfresh] self.freshList = set(tmp) self.mongodb.stock.ZDT_by_date.update({"date": self.today}, { "$set": { "monitorFreshStocks": "_".join(self.freshList), "date": self.today } }, True) print "update Monitored fresh stocks finished!"
def add_fullepisodes(episode_tree, season_number = -1): try: episode_menu = episode_tree.find_all('div', class_ = 'episodeContainer') for episode_item in episode_menu: episode_name = episode_item.find('div', class_ = 'episodeTitle').a.text episode_airdate = common.format_date(episode_item.find('div', class_ = 'episodeAirDate').contents[1].strip(), '%b %d, %Y', '%d.%m.%Y') episode_plot = episode_item.find('div', class_ = 'episodeDescription').contents[0].strip() episode_thumb = episode_item.find('div', class_ = 'episodeImage').img['src'].split('?')[0] url = episode_item.find('div', class_ = 'episodeTitle').a['href'] try: episode_duration = common.format_seconds(episode_item.find('span', class_ = 'episodeDuration').text.replace(')', '').replace('(', '')) except: episode_duration = -1 try: episode_number = int(episode_item.find('div', class_ = 'episodeIdentifier').text.split('#' + season_number)[1]) except: episode_number = -1 u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels = { 'title' : episode_name, 'durationinseconds' : episode_duration, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate } common.add_video(u, episode_name, episode_thumb, infoLabels = infoLabels, quality_mode = 'list_qualities') except: pass
def get_group_frame(start_date, end_date, target_concept, tframe): #所有相关概念股票在日期范围内的涨幅数据 all_concept_stock_dailyframe = get_all_concept_stocks_daily_rate(start_date, end_date, target_concept, tframe) # 得到每天应该关注的股票列表(过去有涨停历史) concept_history_zt_list = get_daily_forcuse_stock(all_concept_stock_dailyframe) # 每天关注股票对应的涨跌幅,便于统计 forcus_frame_list = [] for trade_date in concept_history_zt_list.keys(): stock_list = concept_history_zt_list[trade_date] tmp_frame = all_concept_stock_dailyframe[(all_concept_stock_dailyframe.TRADE_DATE == trade_date) & (all_concept_stock_dailyframe.TICKER_SYMBOL.isin(stock_list))] forcus_frame_list.append(tmp_frame) # 每天,关注的股票(以前有涨停历史)的涨幅 forcus_frame = pd.concat(forcus_frame_list, axis=0) # 统计数据,每天,在各个rate的股票数 group_frame = forcus_frame.groupby(['TRADE_DATE', 'rate'])['TICKER_SYMBOL'].count().reset_index() # 转换成html支持的格式 group_frame['TRADE_DATE'] = group_frame['TRADE_DATE'].apply(lambda x: common.format_date(x, '%Y-%m-%d')) group_frame.rename(columns={"TRADE_DATE":"date", "TICKER_SYMBOL":"count", "rate":"ratio"}, inplace=True) group_frame['data_flag'] = 1 return group_frame, all_concept_stock_dailyframe
def episodes(episode_url = common.args.url): episode_data = connection.getURL(episode_url) episode_menu = simplejson.loads(episode_data) for episode_item in episode_menu['items']: if episode_item['videos']: url = episode_item['guid'] episode_name = episode_item['title'] episode_plot = episode_item['description'] episode_airdate = common.format_date(episode_item['airdate'], '%Y-%m-%d %H:%M:%S', '%d.%m.%Y') episode_duration = int(episode_item['videos'].itervalues().next()['length']) / 1000 try: episode_thumb = episode_item['images']['kids-mezzannine-16x9']['url'] except: try: episode_thumb = episode_item['images']['kids-mezzannine-4x3']['url'] except: episode_thumb = episode_item['images']['mezzanine']['url'] HD = False for video in episode_item['videos']['flash'].itervalues(): try: if video['bitrate'] > 2000: HD = True except: pass u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration, 'plot' : episode_plot, 'premiered' : episode_airdate } common.add_video(u, episode_name, episode_thumb, infoLabels = infoLabels, HD = HD, quality_mode = 'select_quailty') common.set_view('episodes')
def add_rank_chg(dframe, chg_days=[5, 25], sort_v=5): # dframe = dframe.sort('TRADE_DATE', ascending='True') dframe = dframe.sort_values(by=['TRADE_DATE'], ascending='True') dframe = dframe[[ 'TICKER_SYMBOL', 'TRADE_DATE', 'PRE_CLOSE_PRICE', 'CLOSE_PRICE' ]] dframe['daily_ratio'] = 100 * ( dframe['CLOSE_PRICE'] - dframe['PRE_CLOSE_PRICE']) / dframe['PRE_CLOSE_PRICE'] dframe['daily_ratio'] = dframe['daily_ratio'].round(2) oframe = pd.DataFrame() max_day = max(dframe.TRADE_DATE.values) for chg_day in chg_days: # 求出最近N天的涨跌幅 n_day = common.get_lastN_date(max_day, chg_day) n_day = common.format_date(n_day, "%Y-%m-%d") tmp_frame = dframe[dframe.TRADE_DATE >= n_day] for stockid, stockframe in tmp_frame.groupby('TICKER_SYMBOL'): oframe.loc[stockid, '%s_ratio' % chg_day] = 100 * ( np.prod(1 + stockframe['daily_ratio'] / 100) - 1) oframe.reset_index(inplace=True) oframe.rename(columns={"index": "TICKER_SYMBOL"}, inplace=True) oframe['rank'] = oframe['%s_ratio' % sort_v].rank(method='min', ascending=False) # oframe = oframe.sort("rank") oframe = oframe.sort_values(by=["rank"]) return oframe
def add_fullepisodes(episode_tree, season_number = -1): try: episode_menu = episode_tree.find_all('div', class_ = 'episode_guide') for episode_item in episode_menu: episode_name = common.replace_signs(episode_item.find('img')['title']) episode_airdate = common.format_date(episode_item.find('p', class_ = 'aired_available').contents[1].strip(), '%m/%d/%Y', '%d.%m.%Y') episode_plot = common.replace_signs(episode_item.find('p', class_ = False).text) episode_thumb = episode_item.find('img')['src'].split('?')[0] url = episode_item.find('div', class_ = 'thumb_image').a['href'] try: episode_number = int(episode_item.find('a', class_ = 'title').contents[1].split('Episode ' + season_number)[1]) except: try: episode_number = int(url.split('-0')[1]) except: episode_number = -1 if season_number == -1: season_number = int(url.split('-')[-3]) u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels = { 'title' : episode_name, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate } common.add_video(u, episode_name, episode_thumb, infoLabels = infoLabels, quality_mode = 'list_qualities') except: pass
def add_clips(episode_tree): try: episode_menu = episode_tree.find_all('div', class_ = 'block') for episode_item in episode_menu: episode_name = common.replace_signs(episode_item.find('h3').a.text) episode_plot = common.replace_signs(episode_item.find('p', class_ = False).text) episode_thumb = episode_item.find('img')['src'].split('?')[0] url = episode_item.find('div', class_ = 'thumb_area').a['href'] try: episode_airdate = episode_item.find('div', class_ = 'details').contents[0].split(' ', 1)[1].strip() episode_airdate = common.format_date(episode_airdate, '%B %d, %Y', '%d.%m.%Y') except: episode_airdate = -1 try: episode_duration = common.format_seconds(episode_item.find('h3').small.text.replace(')', '').replace('(', '')) except: episode_duration = -1 u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels = { 'title' : episode_name, 'durationinseconds' : episode_duration, 'plot' : episode_plot, 'premiered' : episode_airdate } common.add_video(u, episode_name, episode_thumb, infoLabels = infoLabels) except: pass
def add_videos_thetonightshow(url, type_, page = 1, added_episodes = []): episodes = [] this_url = (TONIGHT_SHOW_FEED % url) + '&offset=' + str((page-1) * 10) root_data = connection.getURL(this_url) data = simplejson.loads(root_data) for video in data['content']: if video['type'] == type_: if type_ == 'episode': episode_name = video['name'] episode_id = video['episodeNumber'] else: episode_name = video['title'] episode_id = video['id'] if episode_id in added_episodes: continue added_episodes.append(episode_id) pid = video['videos'][0]['mpxPublicId'] episode_url = SMIL % pid try: episode_plot = BeautifulSoup(video['description']['value'], 'html.parser').p.string except: episode_plot = '' try: episode_airdate = common.format_date(video['airDate'][:-6],'%Y-%m-%dT%H:%M:%S','%d.%m.%Y') except: episode_airdate = -1 try: season_number = int(video['season']) except: season_number = -1 try: episode_number = int(video['episodeNumber']) except: episode_number = -1 try: episode_thumb = video['images'][0]['bitImageSmall'] except: episode_thumb = None if video['type'] == 'episode': episode_type = 'Full Episode' else: episode_type = 'Clip' show_name = video['images'][0]['description'] u = sys.argv[0] u += '?url="' + urllib.quote_plus(episode_url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate, 'TVShowTitle' : show_name} episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', False, episode_type)) if page < int(addon.getSetting('maxpages')): episodes.extend(add_videos_thetonightshow(url, type_, page + 1, added_episodes)) return episodes
def episodes(episode_url=common.args.url): episodes = [] episode_data = connection.getURL(episode_url) episode_menu = simplejson.loads(episode_data.replace('}{', '},{'))['results'] for episode_item in episode_menu: episode_airdate = common.format_date(episode_item['airdate'], '%Y-%m-%d', '%d.%m.%Y') if (episode_item['authEndDate'] is None or time.time() >= long(episode_item['authEndDate']) / 1000 ) or (episode_item['fullepisode'] == 'false'): show_name = episode_item['series'].split('/')[-1] url = episode_item['videoURL'] episode_duration = int(episode_item['length']) episode_plot = episode_item['shortDescription'] episode_name = episode_item['name'] try: season_number = episode_item['season'] except: season_number = -1 try: episode_number = episode_item['episode'] except: episode_number = -1 try: episode_thumb = episode_item['videoStillURL'] except: episode_thumb = None try: episode_expires = int(episode_item['endDate']) / 1000 except: episode_expires = False episode_mpaa = episode_item['rating'] try: if episode_item['fullepisode'] == 'true': episode_type = 'Full Episode' else: episode_type = 'Clip' except: episode_type = 'Clip' u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels = { 'title': episode_name, 'durationinseconds': episode_duration, 'season': season_number, 'episode': episode_number, 'plot': episode_plot, 'premiered': episode_airdate, 'TVShowTitle': show_name, 'mpaa': episode_mpaa } infoLabels = common.enrich_infolabels(infoLabels, epoch=episode_expires) episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', False, episode_type)) return episodes
def register_person(conn): while True: sin = common.read_string('SIN', 15) if sin == None: return None if not common.exists(conn, 'people', 'sin', sin): break print('A person with this social insurance number already exists!') name = common.read_string('Name', 20) if name == None: return None height = common.read_float('Height', 0) if height == None: return None weight = common.read_float('Weight', 0) if weight == None: return None eyecolor = common.read_string('Eye Color', 10) if eyecolor == None: return None haircolor = common.read_string('Hair Color', 10) if haircolor == None: return None addr = common.read_string('Address', 50) if addr == None: return None while True: gender = common.read_string('Gender (m/f)') if gender == None: return None if gender == 'm' or gender == 'f': break print('Please select either \'m\' for male or \'f\' for female!') if gender == None: return None birthday = common.read_date('Birthday') if birthday == None: return None try: curs = conn.cursor() curs.bindarraysize = 1 curs.setinputsizes(15,40,float,float,10,10,50,1,8) curs.executemany('insert into people values (:1,:2,:3,:4,:5,:6,:7,:8,to_date(:9,\'yyyymmdd\'))', [(sin,name,height,weight,eyecolor,haircolor,addr,gender,format_date(birthday))]) curs.close() conn.commit() return True except cx_Oracle.DatabaseError as e: error, = e.args if error.code == 1: print('Error: A person with this social insurance number already exists!') else: print('Unknown error', error.code,'!') return False
def add_videos_thetonightshow(url, type_, page = 1, added_episodes = []): episodes = [] this_url = (TONIGHT_SHOW_FEED % url) + '&offset=' + str((page-1) * 10) root_data = connection.getURL(this_url) data = simplejson.loads(root_data) for video in data['content']: if video['type'] == type_: if type_ == 'episode': episode_name = video['name'] episode_id = video['episodeNumber'] else: episode_name = video['title'] episode_id = video['id'] if episode_id in added_episodes: continue added_episodes.append(episode_id) pid = video['videos'][0]['mpxPublicId'] episode_url = SMIL % pid try: episode_plot = BeautifulSoup(video['description']['value'], 'html.parser').p.string except: episode_plot = '' try: episode_airdate = common.format_date(video['airDate'][:-6],'%Y-%m-%dT%H:%M:%S','%d.%m.%Y') except: episode_airdate = -1 try: season_number = int(video['season']) except: season_number = -1 try: episode_number = int(video['episodeNumber']) except: episode_number = -1 try: episode_thumb = video['images'][0]['bitImageSmall'] except: episode_thumb = None if video['type'] == 'episode': episode_type = 'Full Episode' else: episode_type = 'Clip' show_name = video['images'][0]['description'] u = sys.argv[0] u += '?url="' + urllib.quote_plus(episode_url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate, 'TVShowTitle' : show_name } episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', False, episode_type)) if page < int(addon.getSetting('maxpages')): episodes.extend(add_videos_thetonightshow(url, type_, page + 1, added_episodes)) return episodes
def episodes(SITE, episode_url = common.args.url): episodes = [] episode_data = connection.getURL(episode_url) episode_tree = simplejson.loads(episode_data) for episode_item in episode_tree['series']['playlists']: show_name = episode_tree['series']['title'] episode_item = episode_item['playlist'] if '|' in episode_item['headline']: episode_name = episode_item['headline'].split('|')[-1].strip() elif '- ' in episode_item['headline']: episode_name = episode_item['headline'].split('- ')[-1].strip() else: try: episode_name = episode_item['headline'].split(':')[1].strip() except: episode_name = episode_item['headline'] try: episode_info = re.compile('[s|S]([0-9]).[e|E]?([0-9]{0,2}).*').findall(episode_item['title']) try: episode_season, episode_number = episode_info[0] except: episode_season = episode_info episode_number = -1 except: episode_season = -1 episode_number = -1 url = episode_item['id'] try: episode_plot = episode_item['subhead'] except: episode_plot = '' episode_thumb = episode_item['image'] try: episode_duration = common.format_seconds(episode_item['duration']['timecode']) except: continue episode_type = episode_item['contentType'][:-1] episode_airdate = common.format_date(epoch = episode_item['postedDate']['timestamp']) u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play"' infoLabels = { 'title' : episode_name, 'plot' : episode_plot, 'durationinseconds' : episode_duration, 'TVShowTitle' : show_name, 'season' : episode_season, 'episode' : episode_number, 'premiered' : episode_airdate} try: if (episode_item['distributionPolicies'][0]['distributionPolicy']['policyType'] == 'playable'): episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', False, episode_type)) except: if (episode_item['distributionPolicies'][0]['policyType'] == 'playable'): episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', False, episode_type)) else: pass return episodes
def oneDate_freshStocks(self, cdate, predate, mongodb): predate = common.format_date(predate, "%Y%m%d") cdate = common.format_date(cdate, "%Y%m%d") yesResult = mongodb.stock.ZDT_by_date.find_one({"date": predate}) yesFreshStocks = [] ## 今日的次新一字板股,应该是昨天的freshStock(连续过来的新股)以及昨天Add_newStocks中的股票 if yesResult is not None and yesResult.has_key("freshStocks"): yesFreshStocks.extend(yesResult['freshStocks'].split("_")) if yesResult is not None and yesResult.has_key("Add_newStocks"): yesFreshStocks.extend(yesResult['Add_newStocks'].keys()) ##再把今天上市的新股加进来 todayNewadds = [] todayResult = mongodb.stock.ZDT_by_date.find_one({"date": cdate}) if todayResult.has_key("Add_newStocks"): yesFreshStocks.extend(todayResult['Add_newStocks'].keys()) todayNewadds.extend(todayResult['Add_newStocks'].keys()) ##去重 yesFreshStocks = list(set(yesFreshStocks)) ##和涨停股票进行比较 todayZTs = todayResult['ZT_stocks'].split("_") ##今日涨停股票 freshStocks = [x for x in yesFreshStocks if x in todayZTs] ##今日的次新一字涨停股 freshStocks.extend(todayNewadds) openedFreshStocks = [ x for x in yesFreshStocks if x not in todayZTs and x not in todayNewadds ] ##次新股中今天的开板股票 actulZtStocks = [x for x in todayZTs if x not in yesFreshStocks] ##去除次新股的今日自然涨停股票 freshStocks = "_".join(freshStocks) openedFreshStocks = "_".join(openedFreshStocks) actulZtStocks = "_".join(actulZtStocks) dicts = { "freshStocks": freshStocks, "actulZtStocks": actulZtStocks, "openedFreshStocks": openedFreshStocks } mongodb.stock.ZDT_by_date.update({"date": cdate}, {"$set": dicts}, True) print "update 次新/涨停去除次新 for %s" % cdate
def episodes(episode_url = common.args.url): try: season_number = episode_url.split('#')[1] except: season_number = -1 episode_url = episode_url.split('#')[0] episode_data = connection.getURL(episode_url) episode_menu = simplejson.loads(episode_data)['FolderList'][0]['PlaylistList'][0] for episode_item in episode_menu['MediaList']: if episode_item['Season'] == season_number or season_number == -1: ''' THX to foreverguest ''' path_pattern = re.compile('http:\\/\\/.+?\/(.+?)_[a-zA-Z0-9]+') pattern_url = episode_item['Thumbnail_Wide'] path = path_pattern.findall(pattern_url) if not path and episode_item['ClosedCaptionFiles']: path = path_pattern.findall(episode_item['ClosedCaptionFiles'][0]['Path']) if not path: continue video_url = BASE + path[0] + '_' episode_duration = int(episode_item['DurationInSeconds']) episode_name = episode_item['Title'] episode_plot = episode_item['Description'] try: episode_airdate = common.format_date(episode_item['ReleaseDate'], '%m/%d/%Y') except: episode_airdate = None try: episode_number = int(episode_item['Episode']) except: episode_number = -1 try: episode_thumb = episode_item['Thumbnail_854x480'] except: episode_thumb = None try: episode_caption = episode_item['ClosedCaptionFiles'][0]['Path'] except: episode_caption = '' episode_MPAA = episode_item['Rating'] episode_genre = episode_item['Genre'] episode_showtitle = episode_item['ParentChannelName'] video_url = video_url + '#' + episode_caption u = sys.argv[0] u += '?url="' + urllib.quote_plus(video_url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate, 'MPAA' : episode_MPAA, 'Genre' : episode_genre, 'TVShowTitle' : episode_showtitle} common.add_video(u, episode_name, episode_thumb, infoLabels = infoLabels, quality_mode = 'list_qualities') common.set_view('episodes')
def episodes_json(SITE): episode_url = common.args.url master_name = episode_url.split('#')[0] episode_url = episode_url.split('#')[1] episode_data = connection.getURL(episode_url) episode_menu = simplejson.loads(episode_data) for episode_item in episode_menu: url = episode_item['episodeID'] try: episode_duration = episode_item['length'] except: episode_duration = -1 try: episode_airdate = common.format_date(episode_item['airDate'].split('on ')[1],'%B %d, %Y') except: episode_airdate = -1 try: episode_plot = episode_item['summary'] except: episode_plot = episode_item['shortdescription'] episode_name = episode_item['title'] if episode_name == master_name: video_url = EPISODE % url video_data = connection.getURL(video_url) video_tree = BeautifulSoup(video_data, 'html.parser') episode_name = video_tree.headline.string elif episode_name == "": episode_name = episode_plot try: season_number = int(episode_item['identifier'].split(',')[0].split(' ')[1]) except: season_number = -1 try: episode_number = int(episode_item['identifier'].split(', ')[1].split(' ')[1].replace(' Episode ', '')) except: try: episode_number = int(episode_item['identifier'].split(', ')[1].split(' ')[1]) except: episode_number = -1 if episode_number > 100: episode_number = int(re.compile('episode-(\d*)').findall(connection.getRedirect(episode_item['shareURL']))[0]) try: episode_thumb = episode_item['640x360_jpg'] except: episode_thumb = None u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate } common.add_video(u, episode_name, episode_thumb, infoLabels = infoLabels, quality_mode = 'list_qualities') common.set_view('episodes')
def get_zt_high_list(self, date): date = common.format_date(date, "%Y%m%d") mongoRecord = self.mongodb.stock.ZDT_by_date.find({"date": date})[0] ztList = [] hiList = [] if 'ZT_stocks' in mongoRecord.keys(): ztList = mongoRecord['actulZtStocks'].split("_") if 'HD_stocks' in mongoRecord.keys(): hiList = mongoRecord['HD_stocks'].split("_") return [ztList, hiList]
def get_zt_high_list(self, date): date = common.format_date(date, "%Y%m%d") mongoRecord = self.mongodb.stock.ZDT_by_date.find({"date":date})[0] ztList = [] hiList = [] if 'ZT_stocks' in mongoRecord.keys(): ztList = mongoRecord['actulZtStocks'].split("_") if 'HD_stocks' in mongoRecord.keys(): hiList = mongoRecord['HD_stocks'].split("_") return [ztList, hiList]
def episodes(episode_url=common.args.url): episodes = [] episode_data = connection.getURL(episode_url) episode_menu = simplejson.loads(episode_data.replace("}{", "},{"))["results"] for episode_item in episode_menu: episode_airdate = common.format_date(episode_item["airdate"], "%Y-%m-%d", "%d.%m.%Y") if (episode_item["authEndDate"] is None or time.time() >= long(episode_item["authEndDate"]) / 1000) or ( episode_item["fullepisode"] == "false" ): show_name = episode_item["series"].split("/")[-1] url = episode_item["videoURL"] episode_duration = int(episode_item["length"]) episode_plot = episode_item["shortDescription"] episode_name = episode_item["name"] try: season_number = episode_item["season"] except: season_number = -1 try: episode_number = episode_item["episode"] except: episode_number = -1 try: episode_thumb = episode_item["videoStillURL"] except: episode_thumb = None try: episode_expires = int(episode_item["endDate"]) / 1000 except: episode_expires = False episode_mpaa = episode_item["rating"] try: if episode_item["fullepisode"] == "true": episode_type = "Full Episode" else: episode_type = "Clip" except: episode_type = "Clip" u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels = { "title": episode_name, "durationinseconds": episode_duration, "season": season_number, "episode": episode_number, "plot": episode_plot, "premiered": episode_airdate, "TVShowTitle": show_name, "mpaa": episode_mpaa, } infoLabels = common.enrich_infolabels(infoLabels, epoch=episode_expires) episodes.append((u, episode_name, episode_thumb, infoLabels, "list_qualities", False, episode_type)) return episodes
def add_clips(episode_tree, season_number=-1): episodes = [] try: episode_menu = episode_tree.find_all('div', class_='search_pad') for episode_item in episode_menu: show_name = episode_item.find('div', class_='search_show').text episode_name = episode_item.find( 'div', class_='search_text').a.text.strip() episode_plot = episode_item.find( 'div', class_='search_text').contents[4].strip() url = episode_item.find('div', class_='search_text').a['href'] episode_thumb = episode_item.find( 'div', class_='search_image').img['src'].split('?')[0] try: episode_airdate = episode_item.find( 'div', class_='episode_meta').contents[5].text.replace( 'Aired: ', '').strip() episode_airdate = common.format_date(episode_airdate, '%B %d, %Y', '%d.%m.%Y') except: episode_airdate = -1 try: episode_duration = common.format_seconds( episode_item.find('span', class_='search_duration').text.replace( ')', '').replace('(', '')) except: episode_duration = -1 try: episode_number = int( episode_item.find( 'div', class_='episode_meta').contents[1].text.split('#')[1]) except: episode_number = -1 u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels = { 'title': episode_name, 'durationinseconds': episode_duration, 'season': season_number, 'episode': episode_number, 'plot': episode_plot, 'premiered': episode_airdate, 'TVShowTitle': show_name } episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', False, 'Clip')) except: pass return episodes
def oneDate_freshStocks(self,cdate,predate,mongodb): predate = common.format_date(predate,"%Y%m%d") cdate = common.format_date(cdate,"%Y%m%d") yesResult = mongodb.stock.ZDT_by_date.find_one({"date":predate}) yesFreshStocks = [] ## 今日的次新一字板股,应该是昨天的freshStock(连续过来的新股)以及昨天Add_newStocks中的股票 if yesResult is not None and yesResult.has_key("freshStocks"): yesFreshStocks.extend(yesResult['freshStocks'].split("_")) if yesResult is not None and yesResult.has_key("Add_newStocks"): yesFreshStocks.extend(yesResult['Add_newStocks'].keys()) ##再把今天上市的新股加进来 todayNewadds = [] todayResult = mongodb.stock.ZDT_by_date.find_one({"date":cdate}) if todayResult.has_key("Add_newStocks"): yesFreshStocks.extend(todayResult['Add_newStocks'].keys()) todayNewadds.extend(todayResult['Add_newStocks'].keys()) ##去重 yesFreshStocks=list(set(yesFreshStocks)) ##和涨停股票进行比较 todayZTs = todayResult['ZT_stocks'].split("_") ##今日涨停股票 freshStocks = [x for x in yesFreshStocks if x in todayZTs] ##今日的次新一字涨停股 freshStocks.extend(todayNewadds) openedFreshStocks = [x for x in yesFreshStocks if x not in todayZTs and x not in todayNewadds] ##次新股中今天的开板股票 actulZtStocks = [x for x in todayZTs if x not in yesFreshStocks] ##去除次新股的今日自然涨停股票 freshStocks = "_".join(freshStocks) openedFreshStocks = "_".join(openedFreshStocks) actulZtStocks = "_".join(actulZtStocks) dicts = { "freshStocks":freshStocks, "actulZtStocks":actulZtStocks, "openedFreshStocks":openedFreshStocks } mongodb.stock.ZDT_by_date.update({"date":cdate},{"$set":dicts},True) print "update 次新/涨停去除次新 for %s"%cdate
def crawl_match_details(params): global s, match_url r = s.post(url=match_url, data=params, headers=general_headers, allow_redirects=True) if r.status_code != 200: print "Error when crawl %s, %s" % (params['lookedUserID'], params['lookedUserName']) return DataFrame() content = r.content.decode("utf-8") dict_content = json.loads(content) dframe = DataFrame() count = 0 for dicts in dict_content['dto']['map']['listSp']: user = dicts['userName'] #名字 date = dicts['endDateNum'] #日期 date = common.format_date(date, "%Y/%m/%d") fistMoney = dicts['firstMoney'] # 初始资产 preMoney = dicts['preMoney'] # 昨日资产 nowMoney = dicts['nowMoney'] # 今日资产 todayRate = float(dicts['todayRate']) / 100 # 今日收益 lastRate = float(dicts['preRate']) / 100 # 昨日收益 totalRate = float(dicts['totalRate']) / 100 # 总收益 position = dicts['position'] # 仓位 stockName = [] # 持有股票代码 stockId = [] # 持有股票名称 urls = dicts['imgUrl'].split(u",") t_url = [] for url in urls: if len(url) > 2: url = url.replace(u"../", u"") t_url.append(u"http://image.taoguba.com.cn/" + url) jgdurl = u",".join(t_url) for stockinfos in dicts['listStock']: stockName.append(stockinfos['stock']['stockName']) stockId.append(stockinfos['stockCode']) dframe.loc[count, u'名字'] = user dframe.loc[count, u'日期'] = date dframe.loc[count, u'初始资产'] = fistMoney dframe.loc[count, u'昨日资产'] = preMoney dframe.loc[count, u'今日资产'] = nowMoney dframe.loc[count, u'今日收益'] = todayRate dframe.loc[count, u'昨日收益'] = lastRate dframe.loc[count, u'总收益'] = totalRate dframe.loc[count, u'仓位'] = position dframe.loc[count, u'持有股票'] = "_".join(stockName) dframe.loc[count, u'持有股票ID'] = "_".join(stockId) dframe.loc[count, u'交割单'] = jgdurl count += 1 return dframe
def baseInfo(self,tframe): dicts={} ZT_list = [] DT_list = [] HD_list = [] LD_list = [] try: ZT_list=[str(int(x)) if (len(str(int(x)))==6) else'0'*(6-len(str(int(x))))+str(int(x)) for x in tframe['up10'].values if x>0] DT_list=[str(int(x)) if (len(str(int(x)))==6) else'0'*(6-len(str(int(x))))+str(int(x)) for x in tframe['dn10'].values if x>0] HD_list=[str(int(x)) if (len(str(int(x)))==6) else'0'*(6-len(str(int(x))))+str(int(x)) for x in tframe['high10'].values if x>0] LD_list=[str(int(x)) if (len(str(int(x)))==6) else'0'*(6-len(str(int(x))))+str(int(x)) for x in tframe['low10'].values if x>0] ZT_list = list(set(ZT_list)) DT_list = list(set(DT_list)) HD_list = list(set(HD_list)) LD_list = list(set(LD_list)) xgb_records = common.get_xgb_news() xgb_zt_list = [] xgb_dt_list = [] xgb_hd_list = [] if len(xgb_records['ZT']) >0: xgb_zt_list = xgb_records['ZT'].keys() if len(xgb_records['DT']) > 0: xgb_dt_list = xgb_records['DT'].keys() if len(xgb_records['HD']) > 0: xgb_hd_list = xgb_records['HD'].keys() ZT_list.extend(xgb_zt_list) ZT_list = list(set(ZT_list)) ZT_list = [x for x in ZT_list if len(x)>0] DT_list.extend(xgb_dt_list) DT_list = list(set(DT_list)) DT_list = [x for x in DT_list if len(x) > 0] HD_list.extend(xgb_hd_list) HD_list = list(set(HD_list)) HD_list = [x for x in HD_list if len(x) > 0] dicts['ZT_num']=len(ZT_list) dicts['DT_num']=len(DT_list) dicts['ZT_stocks']="_".join(ZT_list) dicts['DT_stocks']="_".join(DT_list) dicts['HD_stocks']="_".join(HD_list) dicts['LD_stocks']="_".join(LD_list) dicts['date']=common.format_date(self.cdate,"%Y%m%d") except Exception: err = traceback.format_exc() logging.getLogger().error("【%s, extrace ZDHL Error】%s"%(self.cdate, err)) return [dicts,ZT_list,DT_list,HD_list,LD_list]
def episodes(episode_url = common.args.url): episodes = [] episode_data = connection.getURL(episode_url) episode_menu = simplejson.loads(episode_data.replace('}{', '},{'))['results'] for episode_item in episode_menu: episode_airdate = common.format_date(episode_item['airdate'],'%Y-%m-%d', '%d.%m.%Y') if (episode_item['authEndDate'] is None or time.time() >= long(episode_item['authEndDate'])/1000) or (episode_item['fullepisode'] == 'false'): show_name = episode_item['series'].split('/')[-1] url = episode_item['videoURL'] episode_duration = int(episode_item['length']) episode_plot = episode_item['shortDescription'] episode_name = episode_item['name'] try: season_number = episode_item['season'] except: season_number = -1 try: episode_number = episode_item['episode'] except: episode_number = -1 try: episode_thumb = episode_item['videoStillURL'] except: episode_thumb = None try: episode_expires = int(episode_item['endDate']) / 1000 except: episode_expires = False episode_mpaa = episode_item['rating'] try: if episode_item['fullepisode'] == 'true': episode_type = 'Full Episode' else: episode_type = 'Clip' except: episode_type = 'Clip' u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate, 'TVShowTitle' : show_name, 'mpaa' : episode_mpaa } infoLabels = common.enrich_infolabels(infoLabels, epoch = episode_expires) episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', False, episode_type)) return episodes
def episodes(SITE, quality = True): episode_url = common.args.url episode_data = connection.getURL(episode_url) episode_menu = simplejson.loads(episode_data)['entries'] for i, episode_item in enumerate(episode_menu): default_mediacontent = None for mediacontent in episode_item['media$content']: if (mediacontent['plfile$isDefault'] == True) and (mediacontent['plfile$format'] == 'MPEG4'): default_mediacontent = mediacontent elif (mediacontent['plfile$format'] == 'MPEG4'): mpeg4_mediacontent = mediacontent if default_mediacontent is None: default_mediacontent = mpeg4_mediacontent url = default_mediacontent['plfile$url'] episode_duration = int(episode_item['media$content'][0]['plfile$duration']) episode_plot = episode_item['description'] episode_airdate = common.format_date(epoch = episode_item['pubDate']/1000) episode_name = episode_item['title'] try: season_number = int(episode_item['pl' + str(i + 1) + '$season'][0]) except: try: season_number = int(episode_item['nbcu$seasonNumber']) except: season_number = -1 try: episode_number = int(episode_item['pl' + str(i + 1) + '$episode'][0]) except: try: episode_number = int(episode_item['nbcu$episodeNumber']) except: episode_number = -1 try: episode_thumb = episode_item['plmedia$defaultThumbnailUrl'] except: episode_thumb = None u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate } if quality: common.add_video(u, episode_name, episode_thumb, infoLabels = infoLabels, quality_mode = 'list_qualities') else: common.add_video(u, episode_name, episode_thumb, infoLabels = infoLabels) common.set_view('episodes')
def get_backsee(self,sframe): try: # step 1: write "ZT_stocks,ZT_num,DT_stocks,DT_num" base info to mongo [dicts,ZT_list,DT_list,HD_list,LD_list]=self.baseInfo(sframe) # step 2: ZT/DT stocks, that got big amount # ZT_overMonut=self.overMount(ZT_list) # dicts['ZT_Mount']=ZT_overMonut # DT_overMonut=self.overMount(DT_list) # dicts['DT_Mount']=DT_overMonut # step 3: generate Add_newStocks, actulZtStocks, freshStocks, openedFreshedStocks print dicts['date'] yesterday = common.get_last_date(dicts['date']) yesterday = common.format_date(yesterday,"%Y%m%d") yesResults = self.mongodb.stock.ZDT_by_date.find_one({"date":yesterday}) freshStocks = [] if yesResults.has_key("freshStocks"): freshStocks = yesResults['freshStocks'].split("_") yesterdayNewAddStocks = [] if yesResults.has_key("Add_newStocks"): yesterdayNewAddStocks = yesResults['Add_newStocks'].keys() ## 新股加入到mongo中,每天刷新一次 Collect_NewStocks.fresh_newStockWebsite() todResults = self.mongodb.stock.ZDT_by_date.find_one({"date":dicts['date']}) newAddStocks = [] if todResults is not None and todResults.has_key("Add_newStocks"): newAddStocks = todResults['Add_newStocks'].keys() freshStocks.extend(newAddStocks) ## 自然涨停股票 dicts['actulZtStocks'] = "_".join([x for x in dicts['ZT_stocks'].split("_") if x not in freshStocks]) ## 连续涨停的次新股 freshStocks = [x for x in freshStocks if x in dicts['ZT_stocks']] # ZT_stocks可能不包含今日的股票(因为从tushare拿股票代码,不一定包括今天和昨天的新股, 可能还不包括前天的股票list) freshStocks.extend(newAddStocks) freshStocks.extend(yesterdayNewAddStocks) dicts['freshStocks'] = "_".join(list(set(freshStocks))) ## 开板次新股 dicts['openedFreshedStocks'] = "_".join([x for x in freshStocks if x not in dicts['ZT_stocks'] and x not in newAddStocks]) self.mongodb.stock.ZDT_by_date.update({"date":dicts['date']},{"$set":dicts},True) logging.getLogger().info("【getTenpercent summary】daily ZDT/actualZt. summary finished!") except: err = traceback.format_exc() logging.getLogger().info("【getTenpercent summary】daily ZDT/actualZt. summary finished!")
def copy_pool_dragon(self, from_date, end_date): from_date = common.format_date(from_date, "%Y%m%d") from_results = self.mongodb.concepts.dragon_pool.find({"date":from_date}) if from_results.count() == 0: raise BaseException("No dragon pool record for date:%s, failed to copy" % from_date) # 昨日的记录 from_result = from_results[0] # 今日数据库的记录 end_date = common.format_date(end_date, "%Y%m%d") end_results = self.mongodb.concepts.dragon_pool.find({"date":end_date}) end_result = {} if end_results.count() > 0: end_result = end_results[0] del end_result['_id'] # 二者合并,以end_date的记录进行覆盖式添加 del from_result['_id'] del from_result['date'] for concept in end_result: if len(end_result[concept]) > 0: from_result[concept] = end_result[concept] from_result['date'] = end_date self.mongodb.concepts.dragon_pool.update({"date":end_date}, from_result, upsert=True)
def add_fullepisodes(episode_tree, season_number=-1): episodes = [] try: episode_menu = episode_tree.find_all('div', class_='episodeContainer') for episode_item in episode_menu: episode_name = episode_item.find('div', class_='episodeTitle').a.text episode_airdate = common.format_date( episode_item.find('div', class_='episodeAirDate').contents[1].strip(), '%b %d, %Y', '%d.%m.%Y') episode_plot = episode_item.find( 'div', class_='episodeDescription').contents[0].strip() episode_thumb = episode_item.find( 'div', class_='episodeImage').img['src'].split('?')[0] url = episode_item.find('div', class_='episodeTitle').a['href'] try: episode_duration = common.format_seconds( episode_item.find('span', class_='episodeDuration').text.replace( ')', '').replace('(', '')) except: episode_duration = -1 try: episode_number = int( episode_item.find('div', class_='episodeIdentifier').text.split( '#' + season_number)[1]) except: episode_number = -1 show_name = episode_thumb.split('/')[5].replace('_', ' ').title() u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels = { 'title': episode_name, 'durationinseconds': episode_duration, 'season': season_number, 'episode': episode_number, 'plot': episode_plot, 'premiered': episode_airdate, 'TVShowTitle': show_name } episodes.append((u, episode_name, episode_thumb, infoLabels, 'list_qualities', False, 'Full Episode')) except: pass return episodes
def add_fullepisodes_southpark(episode_tree): try: season = urllib.unquote( sys.argv[2].split('&')[2].split('=')[1].replace('%22', '')).split(' ')[1] episode_menu = episode_tree.find_all('article', class_='thumb') for episode_item in episode_menu: episode_name = episode_item.find(class_='title') if episode_name is None: continue url = episode_item.a['href'] try: season_number, episode_number = re.compile( 's([0-9]{2})e([0-9]{2})').findall(url)[0] except: episode_number = -1 season_number = -1 if int(season) != int(season_number): continue episode_name = episode_name.string.strip() episode_plot = episode_item.find('p', class_='episode').string.strip() episode_airdate = episode_item.find( class_='air-date').string.strip() episode_airdate = common.format_date(episode_airdate, '%m.%d.%Y', '%d.%m.%Y') episode_thumb = re.match( '(.*?)url\(\'(.*?)\'\)', episode_item.find('a', class_='fill')['style']).group(2) u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels = { 'title': episode_name, 'season': season_number, 'episode': episode_number, 'plot': episode_plot, 'premiered': episode_airdate } common.add_video(u, episode_name, episode_thumb, infoLabels=infoLabels, quality_mode='list_qualities') except: pass
def episodes(episode_url = common.args.url): try: shutil.rmtree(os.path.join(ustvpaths.DATAPATH,'thumbs')) except: pass episode_data = connection.getURL(VIDEOLIST % episode_url.split('#')[0]) episode_menu = simplejson.loads(episode_data)['videos'] os.mkdir(os.path.join(ustvpaths.DATAPATH,'thumbs')) for episode_item in episode_menu: if int(episode_item['fullep']) == int(episode_url.split('#')[1]): show_name = episode_item['series_name'] url = episode_item['guid'] episode_duration = int(episode_item['duration_secs']) episode_plot = episode_item['description_long'] episode_name = episode_item['title'] season_number = int(episode_item['season']) episode_thumb = episode_item['large_thumbnail'] thumb_file = episode_thumb.split('/')[-1] thumb_path = os.path.join(ustvpaths.DATAPATH, 'thumbs', thumb_file) thumbcount = 0 for name in glob.glob(os.path.join(ustvpaths.DBPATH,'textures[0-9]*.db')): thumbcount = thumbcount+ database.execute_command('select count(1) from texture where url = ?', [thumb_path,], fetchone = True, dbfile = name)[0] if thumbcount == 0: thumb_data = connection.getURL(episode_thumb) file = open(thumb_path, 'wb') file.write(thumb_data) file.close() try: episode_number = int(episode_item['episode'][len(str(season_number)):]) except: episode_number = -1 try: episode_airdate = common.format_date(episode_item['airdate'],'%Y-%b-%d', '%d.%m.%Y') except: episode_airdate = -1 u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate, 'tvshowtitle': show_name } common.add_video(u, episode_name, thumb_path, infoLabels = infoLabels) common.set_view('episodes')
def crawl_match_details(params): global s, match_url r = s.post(url=match_url, data=params, headers=general_headers, allow_redirects=True) if r.status_code != 200: print "Error when crawl %s, %s" %(params['lookedUserID'], params['lookedUserName']) return DataFrame() content = r.content.decode("utf-8") dict_content = json.loads(content) dframe = DataFrame() count = 0 for dicts in dict_content['dto']['map']['listSp']: user = dicts['userName'] #名字 date = dicts['endDateNum'] #日期 date = common.format_date(date, "%Y/%m/%d") fistMoney = dicts['firstMoney'] # 初始资产 preMoney = dicts['preMoney'] # 昨日资产 nowMoney = dicts['nowMoney'] # 今日资产 todayRate = float(dicts['todayRate'])/100 # 今日收益 lastRate = float(dicts['preRate'])/100 # 昨日收益 totalRate = float(dicts['totalRate'])/100 # 总收益 position = dicts['position'] # 仓位 stockName = [] # 持有股票代码 stockId = [] # 持有股票名称 urls = dicts['imgUrl'].split(u",") t_url = [] for url in urls: if len(url) > 2: url = url.replace(u"../", u"") t_url.append(u"http://image.taoguba.com.cn/" + url) jgdurl = u",".join(t_url) for stockinfos in dicts['listStock']: stockName.append(stockinfos['stock']['stockName']) stockId.append(stockinfos['stockCode']) dframe.loc[count, u'名字'] = user dframe.loc[count, u'日期'] = date dframe.loc[count, u'初始资产'] = fistMoney dframe.loc[count, u'昨日资产'] = preMoney dframe.loc[count, u'今日资产'] = nowMoney dframe.loc[count, u'今日收益'] = todayRate dframe.loc[count, u'昨日收益'] = lastRate dframe.loc[count, u'总收益'] = totalRate dframe.loc[count, u'仓位'] = position dframe.loc[count, u'持有股票'] = "_".join(stockName) dframe.loc[count, u'持有股票ID'] = "_".join(stockId) dframe.loc[count, u'交割单'] = jgdurl count += 1 return dframe
def test_csv_file_contents(self): # Read CSV File contents and convert prices from string to float, then keep it in a dict with open(constant.DAILY_PRICE_FILE_PATH, mode='r') as csv_file: reader = csv.reader(csv_file) next(reader, None) csv_file_contents = {rows[0]:make_float(rows[1]) for rows in reader} # Get data from URL and keep it in a dict price_data = common.get_data(constant.URL)['series'][0]['data'] price_data_dict = {} for i, date_and_price in enumerate(price_data): price_data[i][0] = common.format_date(date_and_price[0]) price_data_dict[price_data[i][0]] = price_data[i][1] # Assert both data is equal self.assertTrue(csv_file_contents == price_data_dict)
def updatefreshlist(self): tardate = common.get_lastN_date(self.today, 120) tardate = common.format_date(tardate, "%Y%m%d") results = self.mongodb.stock.ZDT_by_date.find({"date": {"$gte": tardate, "$lte": self.today},"Add_newStocks": {"$exists": True}}) for result in results: tmp = result['Add_newStocks'].keys() if u'未公布' in tmp: tmp.remove(u'未公布') self.freshList.update(tmp) tmp = list(self.freshList) todayfresh = self.mongodb.stock.ZDT_by_date.find_one({'date':self.yestoday})['freshStocks'] tmp = [x for x in self.freshList if x not in todayfresh] self.freshList = set(tmp) self.mongodb.stock.ZDT_by_date.update({"date":self.today},{"$set":{"monitorFreshStocks":"_".join(self.freshList),"date":self.today}},True) print "update Monitored fresh stocks finished!"
def episodes(episode_url = common.args.url): episode_data = connection.getURL(episode_url) episode_json = simplejson.loads(episode_data)['result'] episode_menu = episode_json['data'] title = episode_json['title'] for episode_item in episode_menu: if episode_item['status'] == 'AVAILABLE' or (addon.getSetting('cbs_use_login') == 'true' and episode_item['status'] == 'PREMIUM'): videourl = episode_item['streaming_url'] if '_hd_' in videourl: HD = True else: HD = False url = BASE + episode_item['url'] episode_duration = int(common.format_seconds(episode_item['duration'])) episode_airdate = common.format_date(episode_item['airdate'], '%m/%d/%y') if len(episode_item['label']) < len(episode_item['title']) and episode_item['label']: episode_name = episode_item['label'] else: episode_name = episode_item['title'] try: season_number = int(episode_item['season_number']) except: season_number = -1 try: episode_number = int(episode_item['episode_number']) except: episode_number = -1 try: episode_thumb = episode_item['thumb']['large'] except: episode_thumb = None episode_plot = episode_item['description'] if url is not None: u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration, 'season' : season_number, 'episode' : episode_number, 'plot' : episode_plot, 'premiered' : episode_airdate } common.add_video(u, episode_name, episode_thumb, infoLabels = infoLabels, quality_mode = 'list_qualities', HD = HD) else: pass common.set_view('episodes')
def scan(self): # 第一步,先扫描得到有涨停记录的股票,生成dataframe # {u'09:30': [2, '603819,603987']}, {u'09:49': [1, '000022']} self.closeZt, self.openZt = self.get_zt_details() # dataframe用来记录每个时间点,开板和封板的个数和股票情况 dataframe = common.get_minly_ratio_frame(["ZS000001"], self.day) dataframe.set_index('barTime', inplace=True) if len(self.closeZt) == 0: dataframe.loc['15:00', 'Num'] = 0 # dataframe.loc['15:00', 'Details'] = "" for bartime in self.closeZt: bartime = bartime.encode("utf-8") dataframe.loc[bartime, 'Num'] = self.closeZt[bartime][0] dataframe.loc[bartime, 'Details'] = self.closeZt[bartime][1] for bartime in self.openZt: bartime = bartime.encode("utf-8") dataframe.loc[bartime, 'oNum'] = self.openZt[bartime][0] dataframe.loc[bartime, 'oDetails'] = self.openZt[bartime][1] if len(self.openZt) == 0: dataframe.loc['15:00', 'oNum'] = 0 # dataframe.loc['15:00', 'oDetails'] = " " dataframe.fillna(value="'-'", inplace=True) # 第二步,再得到每日的概念聚合的股票 self.dataframe = dataframe dframe_list = [dataframe] title_list = [''] type_list = [2] concept_dict = common.get_concept_list(self.day) for concept in concept_dict.keys(): # print concept stock_list = concept_dict[concept] # 增加上证指数 stock_list.append("ZS000001") tmp_dframe = common.get_dataframe_option1(stock_list, self.day) dframe_list.append(tmp_dframe) title_list.append(concept) type_list.append(1) # 生成html文件 save_day = common.format_date(self.day, "%Y%m%d") common.get_html_curve(dframe_list, u"market_eye", html_types=type_list, title_list=title_list, save_dir=os.path.join( u"D:/Money/modeResee/复盘/%s" % save_day, ""))
def episodes(episode_url=common.args.url): episodes = [] episode_data = connection.getURL(episode_url) episode_menu = simplejson.loads(episode_data) for episode_item in episode_menu['items']: if episode_item['videos']: url = episode_item['guid'] episode_name = episode_item['title'] episode_plot = episode_item['description'] episode_airdate = common.format_date(episode_item['airdate'], '%Y-%m-%d %H:%M:%S', '%d.%m.%Y') episode_duration = int( episode_item['videos'].itervalues().next()['length']) / 1000 try: episode_thumb = episode_item['images']['kids-mezzannine-16x9'][ 'url'] except: try: episode_thumb = episode_item['images'][ 'kids-mezzannine-4x3']['url'] except: episode_thumb = episode_item['images']['mezzanine']['url'] HD = False for video in episode_item['videos']['flash'].itervalues(): try: if video['bitrate'] > 2000: HD = True except: pass episode_type = 'Full ' + episode_item['type'] show_name = episode_item['series_title'] u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels = { 'title': episode_name, 'durationinseconds': episode_duration, 'plot': episode_plot, 'premiered': episode_airdate, 'TVShowTitle': show_name } episodes.append((u, episode_name, episode_thumb, infoLabels, 'select_quailty', HD, episode_type)) return episodes
def add_clips(episode_tree): episodes = [] try: try: episode_menu = episode_tree.find(class_ ='clips').find_all(class_= 'clip') except: episode_menu = episode_tree.find_all(class_ = 'block') for episode_item in episode_menu: try: episode_name = common.replace_signs(episode_item.find('a', class_ = 'title').text) except: episode_name = common.replace_signs(episode_item.find('h3').a.text) episode_plot = common.replace_signs(episode_item.find('p', class_ = False).text) episode_thumb = episode_item.find('img')['src'].split('?')[0] try: url = episode_item.find('a', class_ = 'title')['href'] except: url = episode_item.find('div', class_ = 'thumb_area').a['href'] try: try: episode_airdate = episode_item.find('div', class_ ='info').contents[-1].split(' ', 1)[1].strip() except: episode_airdate = episode_item.find('div', class_ = 'details').find('small', text = re.compile('Posted')).text.split(' ', 1)[1].strip() episode_airdate = common.format_date(episode_airdate, '%B %d, %Y', '%d.%m.%Y') except: episode_airdate = -1 try: episode_duration = re.compile('\((.*)\)').findall(episode_name)[0] episode_name = re.compile('(.*)\s\(.*\)').findall(episode_name)[0] episode_duration = common.format_seconds(episode_duration) except: try: episode_duration = common.format_seconds(episode_item.find('h3').small.text.replace(')', '').replace('(', '')) except: episode_duration = -1 u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels = { 'title' : episode_name, 'durationinseconds' : episode_duration, 'plot' : episode_plot, 'premiered' : episode_airdate } episodes.append((u, episode_name, episode_thumb, infoLabels, None, False, 'Clip')) except Exception, e: pass
def episodes(episode_url = common.args.url): episodes = [] episode_data = connection.getURL(episode_url) episode_menu = simplejson.loads(episode_data) for episode_item in episode_menu['items']: if episode_item['videos']: url = episode_item['guid'] episode_name = episode_item['title'] episode_plot = episode_item['description'] episode_airdate = common.format_date(episode_item['airdate'], '%Y-%m-%d %H:%M:%S', '%d.%m.%Y') episode_duration = int(episode_item['videos'].itervalues().next()['length']) / 1000 try: episode_thumb = episode_item['images']['kids-mezzannine-16x9']['url'] except: try: episode_thumb = episode_item['images']['kids-mezzannine-4x3']['url'] except: episode_thumb = episode_item['images']['mezzanine']['url'] HD = False for video in episode_item['videos']['flash'].itervalues(): try: if video['bitrate'] > 2000: HD = True except: pass episode_type = 'Full ' + episode_item['type'] show_name = episode_item['series_title'] try: episode_number, episode_season = re.compile('\(Ep. ([0-9])([0-9][0-9])\)').search(episode_name).groups() except: episode_number = -1 episode_season = -1 u = sys.argv[0] u += '?url="' + urllib.quote_plus(url) + '"' u += '&mode="' + SITE + '"' u += '&sitemode="play_video"' infoLabels={ 'title' : episode_name, 'durationinseconds' : episode_duration, 'plot' : episode_plot, 'premiered' : episode_airdate, 'TVShowTitle' : show_name, 'season' : episode_season, 'episode' : episode_number} episodes.append((u, episode_name, episode_thumb, infoLabels, 'select_quailty', HD, episode_type)) return episodes
def get_backsee(self, sframe): # step 1: write "ZT_stocks,ZT_num,DT_stocks,DT_num" base info to mongo [dicts, ZT_list, DT_list, HD_list, LD_list] = self.baseInfo(sframe) # step 2: ZT/DT stocks, that got big amount ZT_overMonut = self.overMount(ZT_list) dicts['ZT_Mount'] = ZT_overMonut DT_overMonut = self.overMount(DT_list) dicts['DT_Mount'] = DT_overMonut # step 3: generate Add_newStocks, actulZtStocks, freshStocks, openedFreshedStocks print dicts['date'] yesterday = common.get_last_date(dicts['date']) yesterday = common.format_date(yesterday, "%Y%m%d") yesResults = self.mongodb.stock.ZDT_by_date.find_one( {"date": yesterday}) freshStocks = [] if yesResults.has_key("freshStocks"): freshStocks = yesResults['freshStocks'].split("_") ## 新股加入到mongo中,每天刷新一次 Collect_NewStocks.fresh_newStockWebsite() todResults = self.mongodb.stock.ZDT_by_date.find_one( {"date": dicts['date']}) newAddStocks = [] if todResults.has_key("Add_newStocks"): newAddStocks = todResults['Add_newStocks'].keys() freshStocks.extend(newAddStocks) ## 自然涨停股票 dicts['actulZtStocks'] = "_".join( [x for x in dicts['ZT_stocks'].split("_") if x not in freshStocks]) ## 连续涨停的次新股 freshStocks = [x for x in freshStocks if x in dicts['ZT_stocks']] freshStocks.extend(newAddStocks) dicts['freshStocks'] = "_".join(list(set(freshStocks))) ## 开板次新股 dicts['openedFreshedStocks'] = "_".join([ x for x in freshStocks if x not in dicts['ZT_stocks'] and x not in newAddStocks ]) self.mongodb.stock.ZDT_by_date.update({"date": dicts['date']}, {"$set": dicts}, True)
def fresh_newStockWebsite(): pages = [1, 2, 3, 4] # pages = [1] source = 1 # 0代表中财网数据引擎, 1代表东方财富网站 mongoUrl = "localhost" global mongodb mongodb = pymongo.MongoClient(mongoUrl) for page in pages: if source == 0: url = "http://data.cfi.cn/cfidata.aspx?sortfd=&sortway=&curpage=%s&fr=content&ndk=A0A1934A1939A1946A1982&xztj=&mystockt=" % page else: url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=NS&sty=NSSTV5&st=12&sr=-1&p=%s&ps=50" % page print "scan new stock page %s" % page for i in range(0, 5): r = requests.get(url=url) if r.status_code == 200: break else: time.sleep(2) print "connected page %s successfully" % page content = r.content try: if source == 0: [stock_ids, stock_dates, stock_types] = parse_content(content) else: [stock_ids, stock_dates, stock_types] = parse_dc_content(content) except Exception, e: print e for i in range(len(stock_ids)): stockId = stock_ids[i].strip() stockDate = stock_dates[i].strip() stockDate = common.format_date(stockDate, "%Y%m%d") if stockDate == "--" or stockDate > datetime.datetime.today( ).strftime("%Y%m%d"): continue else: update_Mongo(stockId, stockDate)
def get_group_frame(start_date, end_date, target_concept, tframe): #所有相关概念股票在日期范围内的涨幅数据 all_concept_stock_dailyframe = get_all_concept_stocks_daily_rate( start_date, end_date, target_concept, tframe) # 得到每天应该关注的股票列表(过去有涨停历史) concept_history_zt_list = get_daily_forcuse_stock( all_concept_stock_dailyframe) # 每天关注股票对应的涨跌幅,便于统计 forcus_frame_list = [] for trade_date in concept_history_zt_list.keys(): stock_list = concept_history_zt_list[trade_date] tmp_frame = all_concept_stock_dailyframe[ (all_concept_stock_dailyframe.TRADE_DATE == trade_date) & (all_concept_stock_dailyframe.TICKER_SYMBOL.isin(stock_list))] forcus_frame_list.append(tmp_frame) # 每天,关注的股票(以前有涨停历史)的涨幅 forcus_frame = pd.concat(forcus_frame_list, axis=0) # 统计数据,每天,在各个rate的股票数 group_frame = forcus_frame.groupby( ['TRADE_DATE', 'rate'])['TICKER_SYMBOL'].count().reset_index() # 转换成html支持的格式 group_frame['TRADE_DATE'] = group_frame['TRADE_DATE'].apply( lambda x: common.format_date(x, '%Y-%m-%d')) group_frame.rename(columns={ "TRADE_DATE": "date", "TICKER_SYMBOL": "count", "rate": "ratio" }, inplace=True) group_frame['data_flag'] = 1 return group_frame, all_concept_stock_dailyframe