def save_typing(self, update, context): sign_ask_for = self.base_handler.get_data(update, context, self.base_handler.SIGNIN_ASK_FOR, None) if not sign_ask_for: return session = Session.get_from(context.user_data) go_to = None if re.search('odoo_connector_url$', sign_ask_for): session.set_auth_args(url= update.message.text) go_to = self.workflow_connector(update, context) elif re.search('odoo_connector_db$', sign_ask_for): session.set_auth_args(database= update.message.text) go_to = self.workflow_connector(update, context) elif re.search('username$', sign_ask_for): session.set_auth_args(username= update.message.text) elif re.search('password$', sign_ask_for): session.set_auth_args(password = update.message.text) Session.set_from(context.user_data, session) #self.base_handler.set_data(update, context, # self.base_handler.SIGN_HANDLER_MSG, # 'Got it! Please select to update.' #) return go_to or self.workflow_connector(update, context)
def enter(): print('enter') session = Session() r = session.get(URL_ENTER) # 862 html = r.text soup = Soup(html) box = soup.find('aside', id='FilterBox') data = {} for select in box.findAll('select'): name = select.attrs['name'] value = select.findAll('option')[-1].attrs['value'] print(name, value) data[name] = value for input in box.findAll('input'): name = input.attrs['name'] value = input.attrs['value'] if name.startswith('rating_') or 'CSRF_TOKEN' in name: print(name, value) data[name] = value data.update({ 'filter_media': 'A', 'filter_order': 'date_new', 'filter_type': '0', }) r = session.post(URL_FILTER, data=data, headers={'Referer': r.url}) print(r) return session
def get_videos(url, cw=None): print_ = get_print(cw) info = {} user_id = re.find(r'twitch.tv/([^/?]+)', url, err='no user_id') print(user_id) session = Session() r = session.get(url) s = cut_pair(re.find(r'headers *: *({.*)', r.text, err='no headers')) print(s) headers = json_loads(s) payload = [{ 'operationName': 'ClipsCards__User', 'variables': { 'login': user_id, 'limit': 20, 'criteria': { 'filter': 'ALL_TIME' } }, 'extensions': { 'persistedQuery': { 'version': 1, 'sha256Hash': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777' } }, }] videos = [] cursor = None cursor_new = None while True: if cursor: payload[0]['variables']['cursor'] = cursor r = session.post('https://gql.twitch.tv/gql', json=payload, headers=headers) #print(r) data = r.json() for edge in data[0]['data']['user']['clips']['edges']: url_video = edge['node']['url'] info['name'] = edge['node']['broadcaster']['displayName'] video = Video(url_video) video.id = int(edge['node']['id']) videos.append(video) cursor_new = edge['cursor'] print_('videos: {} / cursor: {}'.format(len(videos), cursor)) if cursor == cursor_new: print_('same cursor') break if cursor_new is None: break cursor = cursor_new if not videos: raise Exception('no videos') info['videos'] = sorted(videos, key=lambda video: video.id, reverse=True) return info
def main(): from data.const import get_trade_calendar from datetime import time trade_cal = get_trade_calendar() def transform_tick(tick): ret = None if tick in ['000001', '000300']: ret = ''.join((tick, '_SH_IX')) else: ret = ''.join((tick, '_SZ_IX')) return ret today = str(datetime.now().date()) today_idx = trade_cal.index.searchsorted(today) yesterday = str(trade_cal.index[today_idx - 1].date()) tomorrow = str(trade_cal.index[today_idx + 1].date()) from_date, to_date = None, None if datetime.now().time() > time(15, 10, 0, 0): # 过了15:10 from_date, to_date = today, tomorrow else: from_date, to_date = yesterday, today index_list = ['000001', '000300', '399001', '399005', '399006'] get_config_func = make_config(CONFIG_FILE) ret = predict(ticks=index_list, look_back=519, index=True, get_config_func=get_config_func) ret = ret.sort_values(by='dt') print(ret) engines = [engine_company_local, engine_company_outer] for engine in engines: Base.metadata.bind = engine # Base.metadata.create_all(engine) Session.configure(bind=engine) session = Session() IndexPrediction().to_sql(session, ret) hist_rec = pd.read_sql_table('index_prediction', engine_company_outer) win_ratio = (hist_rec.dropna() .query('dt>"2014-01-01"') .groupby('secu') .apply(lambda df: np.sum( np.isclose(df.pred, df.fact)) * 1.0 / len(df)) .to_frame() .rename(columns={0: 'accuracy'}) .reset_index() ) tomorrow_pred = ret.query("""dt == '{}'""".format(to_date)) to_seven_liu = pd.merge(tomorrow_pred, win_ratio, on='secu') logger.info(u'明日预测结果:\n{}'.format(to_seven_liu))
def __get_cookie(self) -> Session: session = requests.Session() user_key = Session().cookies.get("USERKEY", domain=".novelpia.com") login_key = Session().cookies.get("LOGINKEY", domain=".novelpia.com") if user_key and login_key: session.cookies.set("USERKEY", user_key, domain=".novelpia.com") session.cookies.set("LOGINKEY", login_key, domain=".novelpia.com") return session
def get_video(url, session=None): if session is None: session = Session() session.headers['User-Agent'] = downloader.hdr['User-Agent'] session.headers['X-Directive'] = 'api' html = downloader.read_html(url, session=session) soup = Soup(html) for script in soup.findAll('script'): script = script.text or script.string or '' data = re.find('window.__NUXT__=(.+)', script) if data is not None: data = data.strip() if data.endswith(';'): data = data[:-1] data = json.loads(data) break else: raise Exception('No __NUXT__') info = data['state']['data']['video']['hentai_video'] query = info['slug'] #url_api = 'https://members.hanime.tv/api/v3/videos_manifests/{}?'.format(query) # old url_api = 'https://hanime.tv/rapi/v7/videos_manifests/{}?'.format( query) # new print(url_api) hdr = { 'x-signature': ''.join('{:x}'.format(randrange(16)) for i in range(32)), 'x-signature-version': 'web2', 'x-time': str(int(time())), } r = session.get(url_api, headers=hdr) print(r) data = json.loads(r.text) streams = [] for server in data['videos_manifest']['servers']: streams += server['streams'] streams_good = [] for stream in streams: url_video = stream['url'] if not url_video or 'deprecated.' in url_video: continue streams_good.append(stream) if not streams_good: raise Exception('No video available') print('len(streams_good):', len(streams_good)) for stream in streams_good: print(stream['extension'], stream['width'], stream['filesize_mbs'], stream['url']) stream = streams_good[0] return Video(info, stream), session
def read_channel(url_page, cw=None): print_ = get_print(cw) res = re.find(CHANNEL_PATTERN, url_page) if res is None: raise Exception('Not channel') header, username = res print(header, username) max_pid = get_max_range(cw) info = {} info['header'] = header info['username'] = username session = Session() urls = [] ids = set() for p in range(100): url_api = urljoin(url_page, '/{}/{}/videos/best/{}'.format(header, username, p)) print_(url_api) r = session.post(url_api) data = json.loads(r.text) videos = data.get('videos') #4530 if not videos: print_('empty') break for video in videos: id_ = video['id'] if id_ in ids: print_('duplicate: {}'.format(id_)) continue ids.add(id_) info['name'] = video['pn'] urls.append(urljoin(url_page, video['u'])) if len(urls) >= max_pid: break n = data['nb_videos'] s = '{} {} - {}'.format(tr_('읽는 중...'), info['name'], len(urls)) if cw: cw.setTitle(s) else: print(s) if len(ids) >= n: break sleep(1, cw) if not urls: raise Exception('no videos') info['urls'] = urls[:max_pid] return info
def get_session(url, cw=None): #res = clf2.solve(url, cw=cw) #return res['session'] session = Session() sessionid = session.cookies._cookies.get('.instagram.com', {}).get('/',{}).get('sessionid') if sessionid is None or sessionid.is_expired(): raise errors.LoginRequired() session.headers['User-Agent'] = downloader.hdr['User-Agent'] if not session.cookies.get('csrftoken', domain='.instagram.com'): csrf_token = generate_csrf_token() print('csrf:', csrf_token) session.cookies.set("csrftoken", csrf_token, domain='.instagram.com') return session
def get(self, url): if self._url_video: return self._url_video cw = self.cw print_ = get_print(cw) html = downloader.read_html(url) soup = Soup(html) embedUrl = extract('embedUrl', html, cw) if embedUrl: raise EmbedUrlError('[pandoratv] EmbedUrl: {}'.format(embedUrl)) uid = extract('strLocalChUserId', html, cw) pid = extract('nLocalPrgId', html, cw) fid = extract('strFid', html, cw) resolType = extract('strResolType', html, cw) resolArr = extract('strResolArr', html, cw) vodSvr = extract('nVodSvr', html, cw) resols = extract('nInfo', html, cw) runtime = extract('runtime', html, cw) url_api = 'http://www.pandora.tv/external/getExternalApi/getVodUrl/' data = { 'userId': uid, 'prgId': pid, 'fid': fid, 'resolType': resolType, 'resolArr': ','.join(map(str, resolArr)), 'vodSvr': vodSvr, 'resol': max(resols), 'runtime': runtime, 'tvbox': 'false', 'defResol': 'true', 'embed': 'false', } session = Session() r = session.post(url_api, headers={'Referer': url}, data=data) data = json.loads(r.text) self._url_video = data['src'] self.title = soup.find('meta', {'property': 'og:description'})['content'] ext = get_ext(self._url_video) self.filename = format_filename(self.title, pid, ext) self.url_thumb = soup.find('meta', {'property': 'og:image'})['content'] self.thumb = BytesIO() downloader.download(self.url_thumb, buffer=self.thumb) return self._url_video
def read_channel(url_page, cw=None): print_ = get_print(cw) res = re.find(CHANNEL_PATTERN, url_page) if res is None: raise Exception('Not channel') header, username = res print(header, username) max_pid = get_max_range(cw, 2000) info = {} info['header'] = header info['username'] = username session = Session() urls = [] urls_set = set() for p in range(100): url_api = urljoin(url_page, '/{}/{}/videos/best/{}'.format(header, username, p)) print(url_api) r = session.post(url_api, data='main_cats=false') soup = Soup(r.text) thumbs = soup.findAll('div', class_='thumb-block') if not thumbs: print_('empty') break for thumb in thumbs: info['name'] = thumb.find('span', class_='name').text.strip() href = thumb.find('a')['href'] href = urljoin(url_page, href) if href in urls_set: print_('duplicate: {}'.format(href)) continue urls_set.add(href) urls.append(href) if len(urls) >= max_pid: break s = '{} {} - {}'.format(tr_('읽는 중...'), info['name'], len(urls)) if cw: if not cw.alive: return cw.setTitle(s) else: print(s) if not urls: raise Exception('no videos') info['urls'] = urls[:max_pid] return info
def init(self): self.url = clean_url(self.url) self.session = Session() if re.search(PATTERN_ID, self.url): #1799 select = self.soup.find('select', class_='bookselect') for i, op in enumerate(select.findAll('option')[::-1]): if 'selected' in op.attrs: break else: raise Exception('no selected option') for a in self.soup.findAll('a'): url = urljoin(self.url, a.get('href') or '') if re.search(PATTERN, url): break else: raise Exception('list not found') self.url = self.fix_url(url) self._soup = None for i, page in enumerate( get_pages(self.url, self.session, self.soup)): if page.id == int(op['value']): break else: raise Exception('can not find page') self.cw.range_p = [i]
def get_session(): session = Session() session.cookies.set(name='over18', value='yes', path='/', domain='.syosetu.com') return session
def account(self, update, context): session = Session.get_from(context.user_data) accounts = session.datasource.account() reply_markup = None if isinstance(accounts, dict) and accounts.get('Success') and len( accounts.get('Data', [])): msg = 'Select the account' keyboard = [] keyboardline = [] for row in accounts.get('Data', []): if len(keyboardline) <= 1: keyboardline.append( InlineKeyboardButton(row.get('name'), callback_data='account_{}'.format( row.get('id')))) if len(keyboardline) == 2: keyboard.append(keyboardline) keyboardline = [] if len(keyboardline): keyboard.append(keyboardline) if len(keyboard): reply_markup = InlineKeyboardMarkup(keyboard) Log.info(accounts) else: Log.error(accounts) msg = "Could not get accounts, try later /done" self.base_handler.reply_text(update, context, text=msg, reply_markup=reply_markup) return self.WELCOME
def init(self): self.url = self.url.replace('lhscan.net', 'loveheaven.net') self.session = Session() #clf2.solve(self.url, session=self.session, cw=self.cw) soup = self.soup if not soup.find('ul', class_='manga-info'): self.Invalid(u'{}: {}'.format(tr_(u'목록 주소를 입력해주세요'), self.url))
def soup(self): if self._soup is None: self.session = Session() self._soup = get_soup(self.url, session=self.session, cw=self.customWidget) return self._soup
def get_soup(url, session=None): if session is None: session = Session() res = clf2.solve(url, session=session) soup = Soup(res['html'], apply_css=True) return session, soup, res['url']
def get_id(url, cw=None): print_ = get_print(cw) url = url.split('?')[0].split('#')[0] if '/artwork/' in url: id_art = get_id_art(url) imgs = get_imgs_page(id_art, session=Session(), cw=cw) return imgs[0].data['user']['username'] if '.artstation.' in url and 'www.artstation.' not in url: id = url.split('.artstation')[0].split('//')[-1] type = None elif 'artstation.com' in url: paths = url.split('artstation.com/')[1].split('/') id = paths[0] type = paths[1] if len(paths) > 1 else None else: id = url.replace('artstation_', '').replace('/', '/') type = None if type not in [None, 'likes']: type = None print_('type: {}, id: {}'.format(type, id)) if type: return '{}/{}'.format(id, type) return id
def init(self): if u'bdsmlr.com/post/' in self.url: raise errors.Invalid( tr_(u'개별 다운로드는 지원하지 않습니다: {}').format(self.url)) self.url = 'https://{}.bdsmlr.com'.format(self.id_) self.session = Session() clf2.solve(self.url, session=self.session, cw=self.cw)
def get_soup_session(url, cw=None): print_ = get_print(cw) session = Session() res = clf2.solve(url, session=session, cw=cw) print_('{} -> {}'.format(url, res['url'])) if res['url'].rstrip('/') == 'https://welovemanga.one': raise errors.LoginRequired() return Soup(res['html']), session
def read(self): session = Session() video = get_video(self.url, session, self.cw) self.urls.append(video.url) self.setIcon(video.thumb) self.title = video.title
def main(): stock_list = get_duanxianbao_b10() get_config_func = make_config(CONFIG_FILE) ret = predict(ticks=stock_list, look_back=519, index=False, get_config_func=get_config_func) ret = ret.sort_values(by='dt') ret.loc[:, 'st'] = 'b10' print(ret) engines = [engine_company_local, engine_company_outer] for engine in engines: Base.metadata.bind = engine # Base.metadata.create_all(engine) Session.configure(bind=engine) session = Session() Duanxianbao().to_sql(session, ret)
def init(self): self.url_main = 'https://www.artstation.com/{}'.format(self.id.replace('artstation_', '', 1).replace('/', '/')) if '/artwork/' in self.url: pass#raise NotImplementedError('Single post') else: self.url = self.url_main self.session = Session()
def init(self): self.url = self.url.replace('bdsmlr_', '') if u'bdsmlr.com/post/' in self.url: return self.Invalid(tr_(u'개별 다운로드는 지원하지 않습니다: {}').format(self.url), fail=False) self.url = 'https://{}.bdsmlr.com'.format(self.id_) self.session = Session() clf2.solve(self.url, session=self.session, cw=self.customWidget)
def confirm(self, update, context): session = Session.get_from(context.user_data) try: oper = self.operation(update, context) if oper == 'unidefined': raise Exception( "First define what operation you want to do /income or /expense" ) amount = self.base_handler.get_data(update, context, self.base_handler.HWH_AMOUNT, 0) if not amount: raise Exception( "Please run the command /income or /expense and capture the amount" ) product_id = self.base_handler.get_data( update, context, self.base_handler.HWH_PRODUCT_ID, False) description = self.base_handler.get_data( update, context, self.base_handler.HWH_DESCRIPTION, False) account_id = self.base_handler.get_data( update, context, self.base_handler.HWH_ACCOUNT_ID, False) req = None if oper == 'income': req = session.datasource.income(product_id, description, amount, account_journal_id=account_id) elif oper == 'expense': req = session.datasource.expense(product_id, description, amount, account_journal_id=account_id) if req and isinstance(req, dict) and req.get('Success'): msg = "The {} has been recorded\n\n"\ "{}\n"\ "Record other /income or /expense or get /help".format( oper, self.state_text(update, context), ) self._reset(update, context) Log.info(req) else: Log.error(req) raise Exception("Failed to register the %s, try later /done" % oper) except Exception as e: Log.error(e) msg = str(e) self.base_handler.reply_text(update, context, text=msg, parse_mode='MarkdownV2') return self.WELCOME
async def get_current_user(self) -> Session: """ 实例化session并获取用户session数据 若不使用自带session实现请重写本方法 """ if not self.get_secure_cookie('session_id'): self.set_secure_cookie('session_id', uuid4().hex) self.Session = Session.Session(self) await self.Session.get_data() return self.Session.data
def save_typing(self, update, context): session = Session.get_from(context.user_data) if session.sign_handler and self.__class__.__name__ == 'SignHandler': return session.sign_handler.save_typing(update, context) else: sign_ask_for = self.base_handler.get_data( update, context, self.base_handler.SIGNIN_ASK_FOR, None) self.base_handler.set_data(update, context, sign_ask_for, update.message.text) return self.workflow_connector(update, context)
def read(self): format = compatstr( self.ui_setting.youtubeFormat.currentText()).lower().strip() session = Session() video = get_video(self.url, session, format) self.urls.append(video.url) self.setIcon(video.thumb) self.title = video.title
def workflow_connector(self, update, context): connector = update.callback_query.data Session.del_from(context.user_data) session = Session.get_from(context.user_data) if connector == str(self.CALLBACK_ODOO_CONNECTOR): session.sign_handler = OdooSignHandler(self) session.hey_wallet_handler = OdooHeyWalletHandler(self) session.datasource = Odoo() else: session.sign_handler = None session.hey_wallet_handler = None session.datasource = None Session.set_from(context.user_data, session) if session.sign_handler: return session.sign_handler.workflow_connector(update, context) return self.sign_handler.workflow_connector(update, context)
def ask_for(self, update, context): session = Session.get_from(context.user_data) if session.sign_handler and self.__class__.__name__ == 'SignHandler': return session.sign_handler.ask_for(update, context) else: data = update.callback_query.data self.base_handler.set_data(update, context, self.base_handler.SIGNIN_ASK_FOR, data) text = 'Okay, tell me your {}'.format(data) self.base_handler.reply_text(update, context, text=text) return self.base_handler.STATE_TYPING_SIGNIN
def real_url(url, session=None, cw=None): print_ = get_print(cw) if session is None: session = Session() data = clf2.solve(url, session=session, cw=cw) url_new = data['url'] print('url_new:', url_new) if url_new != url: url_new = urljoin(url_new, '/' + u'/'.join(url.split('/')[3:])) # print_(u'[redirect domain] {} -> {}'.format(url, url_new)) return url_new
def init(self): cw = self.cw self.session = Session() res = clf2.solve(self.url, self.session, cw) soup = Soup(res['html']) if is_captcha(soup): def f(html): return not is_captcha(Soup(html)) clf2.solve(self.url, self.session, cw, show=True, f=f)
def set_session(self, user): sess_id = str(uuid4()) current_session = Session(username=user, sess_id=sess_id, created=datetime.now()) session_cookie = Cookie.SimpleCookie() session_cookie['session_id'] = sess_id session_cookie['session_id']['path'] = "/" # set the session self.sessions[sess_id] = current_session self.default_header.append( ('Set-Cookie', session_cookie.output(header='').strip()))
EquityPrediction) import json import os import logging import pickle import numpy as np fmt = '%(asctime)s - %(filename)s:%(lineno)s - %(name)s - %(message)s' logging.basicConfig(formatter=fmt) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) CONFIG_FILE = 'predict_all_stock_feature5.csv' if __name__ == '__main__': all_stocks = pd.read_csv(CONFIG_FILE, index_col=0, dtype={'tick':object}).tick.unique().tolist() get_config_func = make_config(CONFIG_FILE) ret = predict(ticks=all_stocks, look_back=519, index=False, get_config_func=get_config_func) ret = ret.sort_values(by='dt') print(ret) engines = [engine_company_local, engine_company_outer] for engine in engines: Base.metadata.bind = engine # Base.metadata.create_all(engine) Session.configure(bind=engine) session = Session() EquityPrediction().to_sql(session, ret)
def main(): from data.const import get_trade_calendar from datetime import time trade_cal = get_trade_calendar() def transform_tick(tick): ret = None if tick in ['000001', '000300']: ret = ''.join((tick, '_SH_IX')) else: ret = ''.join((tick, '_SZ_IX')) return ret today = str(datetime.now().date()) today_idx = trade_cal.index.searchsorted(today) yesterday = str(trade_cal.index[today_idx - 1].date()) tomorrow = str(trade_cal.index[today_idx + 1].date()) from_date, to_date = None, None if datetime.now().time() > time(15, 10, 0, 0): # 过了15:10 from_date, to_date = today, tomorrow else: from_date, to_date = yesterday, today # tomorrow = str(trade_cal.index[today_idx + 1].date()) index_list = ['000001', '000300', '399001', '399005', '399006'] # index_list = ['000001'] # today = str(datetime.now().date()) ret = [] for idx in index_list: ret.append(predict_history(idx, from_date=from_date, to_date=to_date, is_index=True, data_source=['mysql', 'local'], config_params=config_params)) df = pd.concat(ret) # df.index = df.index.format() df.reset_index(inplace=True) cols_to_rename = {'dates': 'dt', 'index': 'dt', 'prediction': 'pred', 'facts': 'fact', 'tick': 'secu'} df.rename(columns=cols_to_rename, inplace=True) logger.debug('df in predict local is \n{}'.format(df)) df.dt = df.dt.apply(lambda x: str(x.date())) df.secu = df.secu.apply(transform_tick) df = df.where(pd.notnull(df), None) df = df.loc[:, ['dt', 'secu', 'pred', 'fact']] print(df.sort_values(by=['dt', 'secu'])) # Session.configure(bind=engine_phil_machine_test) engines = [ # engine_phil_machine_test, engine_phil_machine, # engine_company_local, # engine_company_outer, ] for engine in engines: Base.metadata.bind = engine Base.metadata.create_all(engine) Session.configure(bind=engine) session = Session() IndexPrediction().to_sql(session, df)