def gen_prob_time_by_username_fine(): # same as "time_feat.gen_time_by_username.npz" in initial_analysis enr_df = utils.load_enroll() df = utils.load_log() min_date = utils.to_seconds(df['time'].min()) df = df[df['event'] == 'problem'] feat = [] df = df.sort('time') for idx, row in df.groupby('username'): times = sorted(row['time'].tolist()) first_time = utils.to_seconds(times[0]) last_time = utils.to_seconds(times[-1]) feat.append({ 'username': idx, 'first_time': first_time - min_date, 'last_time': last_time - min_date, }) feat = pd.DataFrame(feat) enr_df = enr_df.merge(feat, how='left', on='username') enr_df['first_time'] = enr_df['first_time'].fillna(-1) enr_df['last_time'] = enr_df['last_time'].fillna(-1) return { 'first': utils.reshape(enr_df['first_time']), 'last': utils.reshape(enr_df['last_time']), }
def gen_prob_time_by_enrollment_fine(): # same as "time_feat.gen_first_time.npz" in initial_analysis enr_df = utils.load_enroll() df = utils.load_log() dx = df.groupby('course_id').agg({'time': 'min'}).reset_index() course_min_time = {} for idx, row in dx.iterrows(): course_min_time[row['course_id']] = utils.to_seconds(row['time']) feat = [] df = df.sort('time') df = df[df['event'] == 'problem'] for idx, row in df.groupby('enrollment_id'): times = sorted(row['time'].tolist()) course_id = row['course_id'].tolist()[0] first_time = utils.to_seconds(times[0]) last_time = utils.to_seconds(times[-1]) min_time = course_min_time[course_id] feat.append({ 'enrollment_id': idx, 'first_time': first_time - min_time, 'last_time': last_time - min_time, }) feat = pd.DataFrame(feat) enr_df = enr_df.merge(feat, how='left', on='enrollment_id') enr_df['first_time'] = enr_df['first_time'].fillna(-1) enr_df['last_time'] = enr_df['last_time'].fillna(-1) return { 'first': utils.reshape(enr_df['first_time']), 'last': utils.reshape(enr_df['last_time']), }
def gen_user_event_last_time(): enr_df = utils.load_enroll() df = utils.load_log() min_date = utils.to_seconds(df['time'].min()) df['course_id_x_event'] = df['course_id'] + 'x' + df['event'] feat = [] df = df.sort('time') for idx, row in df.groupby(['username', 'course_id_x_event']): times = sorted(row['time'].tolist()) last_time = utils.to_seconds(times[-1]) feat.append({ 'username': idx[0], 'course_id_x_event': idx[1], 'last_time': last_time - min_date, }) feat = pd.DataFrame(feat) featp = feat.pivot_table(values='last_time', index='username', columns='course_id_x_event').reset_index() colsz = len(featp.columns) - 1 featp.columns = ['username'] + list(range(colsz)) enr_df = enr_df.merge(featp, how='left', on='username') enr_df.fillna(-1, inplace=True) return {'X': np.array(enr_df[list(range(colsz))])}
def gen_prob_first_last_in_judgement_time(): enr_df = utils.load_enroll() df = utils.load_log() df = df[df['event'] == 'problem'] df_by_course = df.groupby('course_id').agg({'time': 'max'}).reset_index() course_evaluation_period = { row['course_id']: utils.to_evaluation_period(row['time'], days=1) for idx, row in df_by_course.iterrows() } course_list = course_evaluation_period.keys() course_df = { course_id: df[ (df['time'] >= course_evaluation_period[course_id]['begin']) & (df['time'] <= course_evaluation_period[course_id]['end']) ] for course_id in course_list } feat = [] df = df.sort('time') sz = len(df) for i, (idx, df_part) in enumerate(df.groupby(['username', 'course_id'])): if i % 100 == 0: l.info("{0} of 200k".format(i)) username = idx[0] course_id = idx[1] d = course_df[course_id][ (course_df[course_id]['username'] == username) ] first_time = -1 if len(d) == 0 else utils.to_seconds(d['time'].min()) last_time = -1 if len(d) == 0 else utils.to_seconds(d['time'].max()) feat.append({ 'username': idx[0], 'course_id': idx[1], 'last_time': last_time, 'first_time': first_time, }) feat = pd.DataFrame(feat) enr_df = enr_df.merge(feat, how='left', on=['username', 'course_id']) enr_df.fillna(-1, inplace=True) return { 'first_time': utils.reshape(enr_df['first_time']), 'last_time': utils.reshape(enr_df['last_time']), }
def serve_peaks(): args = request.args start_dates, last_quiet_dates, end_dates, start_prices, last_quiet_prices, end_prices = pumps.find_pumps_easy( request.args['s'], orig_dir="data/securities", cache_dir="data/securities/cached", min_quiet_days=int(args['min_quiet_days']), quiet_tol=float(args['quiet_tol'] if '.' in args['quiet_tol'] else args['quiet_tol'] + '.'), min_growth_days=int(args['min_growth_days']), max_growth_days=int(args['max_growth_days']), growth_tol=float(args['growth_tol'] if '.' in args['growth_tol'] else args['growth_tol'] + '.'), silent=True, ) conv = lambda x: utils.to_seconds(pd.to_datetime(x)) res = { 'results': sorted( [{'start': s, 'end': e} for s, e in zip( sorted(map(utils.to_seconds, start_dates)), sorted(map(utils.to_seconds, end_dates))) if s and s > conv(config.date_range[0]) and s < conv(config.date_range[1]) ] ) } return jsonify(res)
def run(self): """ run """ count = 1 while count <= self.opts.visits: youtube = YouTube(url=self.opts.url, proxy=self.opts.proxy, verbose=self.opts.verbose) youtube.get_url() title = youtube.get_title() if self.opts.visits > 1 and title: length = (len(title) + 4 - len(str(count))) print('[{0}] {1}'.format(count, '-' * length)) ip_address = utils.get_ipaddr(proxy=self.opts.proxy) if ip_address: print('external IP address:', ip_address) if title: print('title:', title) youtube.play_video() youtube.get_views() video_duration = youtube.time_duration() if video_duration: print('video duration time:', video_duration) seconds = utils.to_seconds(duration=video_duration.split(':')) if seconds: sleep_time = randrange(seconds) if self.opts.verbose: print('video duration time in seconds:', seconds) print('stopping video in %s seconds' % sleep_time) time.sleep(sleep_time) youtube.disconnect() count += 1
def serve_peaks(): args = request.args start_dates, last_quiet_dates, end_dates, start_prices, last_quiet_prices, end_prices = pumps.find_pumps_easy( request.args['s'], orig_dir="data/securities", cache_dir="data/securities/cached", min_quiet_days=int(args['min_quiet_days']), quiet_tol=float(args['quiet_tol'] if '.' in args['quiet_tol'] else args['quiet_tol'] + '.'), min_growth_days=int(args['min_growth_days']), max_growth_days=int(args['max_growth_days']), growth_tol=float(args['growth_tol'] if '.' in args['growth_tol'] else args['growth_tol'] + '.'), silent=True, ) conv = lambda x: utils.to_seconds(pd.to_datetime(x)) res = { 'results': sorted([{ 'start': s, 'end': e } for s, e in zip(sorted(map(utils.to_seconds, start_dates)), sorted(map(utils.to_seconds, end_dates))) if s and s > conv(config.date_range[0]) and s < conv(config.date_range[1])]) } return jsonify(res)
def to_dicts(candidates): import utils """ Converts find_pumps results (tuple of 6 tuples) into and array of dicts """ sds, lqds, eds, sps, lqps, eps = candidates res = [{ 'start': utils.to_seconds(s), 'end': utils.to_seconds(e), 'last_quiet_date': lqd, 'start_prices': sp, 'last_quiet_price': lqp, 'end_price': ep, } for (s, e, lqd, sp, lqp, ep) in zip(sds, eds, lqds, sps, lqps, eps) if s] res = { 'start': [utils.to_seconds(s) for s in sds], 'end': [utils.to_seconds(e) for e in eds], 'last_quiet_date': lqds, 'start_prices': sps, 'last_quiet_price': lqps, 'end_price': eps, } return res
def to_dicts(candidates): import utils """ Converts find_pumps results (tuple of 6 tuples) into and array of dicts """ sds, lqds, eds, sps, lqps, eps = candidates res = [{'start': utils.to_seconds(s), 'end': utils.to_seconds(e), 'last_quiet_date': lqd, 'start_prices': sp, 'last_quiet_price': lqp, 'end_price': ep, } for (s, e, lqd, sp, lqp, ep) in zip(sds, eds, lqds, sps, lqps, eps) if s ] res = { 'start': [utils.to_seconds(s) for s in sds], 'end': [utils.to_seconds(e) for e in eds], 'last_quiet_date': lqds, 'start_prices': sps, 'last_quiet_price': lqps, 'end_price': eps, } return res
def create_objects(cls, symbol, df, securities): descr_box = Paragraph(text='content loading...') btn_close_loading = Button(label='Close Loading') dialog_loading = Dialog( title='loading', content=vplot(descr_box), name='loading_dialog', buttons=[btn_close_loading], visible=False) source_data = dict(df) main_source = ColumnDataSource(dict(df)) source = ColumnDataSource(source_data) # TODO: REMOVE THIS COMMENTED CODE! IT'S JUST THE PREVIOUS # VERSION USED BEFORE NEW P&D Cached results and algorithm # get the cached results of the P&D algorithm computed with the # "default" configuration # intervals = utils.cached_pumps.get(symbol, pumps.to_dicts(((),(),(),(),(),()))) # intervals['bottom'] = [0] * len(intervals['start']) # intervals['values'] = [max(df['price'])] * len(intervals['start']) # # intervals = pd.DataFrame(intervals) # new version stats = utils.get_symbols_cached_stats()[symbol] intervals = pd.DataFrame(stats) intervals['bottom'] = [0] * len(intervals['start']) intervals['values'] = [max(df['price'])] * len(intervals['start']) conv = lambda x: utils.to_seconds(pd.to_datetime(x)) intervals = intervals[ (pd.to_datetime(intervals['start']) > conv(config.date_range[0])) & (pd.to_datetime(intervals['start']) < conv(config.date_range[1])) ] # Create P&Ds intervals DataSource intervals_source = ColumnDataSource(intervals) source.tags = ['main_source'] trends = utils.load_trends_data(symbol, start_date=min(df['dt'])) trends_source = ColumnDataSource(trends) trades = Slider( title="trades", name='trades', value=0, start=0, end=124, step=1 ) # Selectors symbol = Select.create( options=securities, value=symbol, name='symbol', title="" ) window_selector = Select.create( options=['---'], name='period_selector', title="Search intervals with:" ) symbol_filter = Select.create( options=['All', 'Stocks with Spam', 'Stocks without Spam'], name='symbol_filter', title="Filter Symbols:", value='Stocks with Spam' ) callback = Callback( args={'symbol_filter': symbol_filter, 'dialog_loading': dialog_loading}, code=callbacks.symbol_filter ) symbol_filter.callback = callback btn_detect_pumps = Button(label='Configure P&D Detection', name='config_pumps') main_tab = Panel(title="Main") tabs = Tabs() # Create STOCKS TABLE ranks = utils.get_pumps_rank() # quotient_metrics = utils.get_quotient_metrics() # ranks['quotient'] = quotient_metrics['quotient'] foo = lambda x: utils.spams_count.get(x, 0) ranks['spams'] = map(foo, ranks['symbol']) ranks = ranks.sort(['spams', 'vol_quotient'], ascending=False) cls._pre_filtered_ranks = { 'All': {k: ranks[k] for k in ranks.columns}, 'Stocks with Spam': dict(ranks[ranks['spams'] > 0]. sort('vol_quotient', ascending=False)), 'Stocks without Spam': dict(ranks[ranks['spams'] == 0]. sort('vol_quotient', ascending=False)), } source_stocks_rank = ColumnDataSource(cls._pre_filtered_ranks['All']) table_stocks_rank = DataTable( source=source_stocks_rank, width=560, height=450, selectable=True, editable=True, columns=[ TableColumn(field='symbol', title='symbol', width=130, editor=StringEditor()), TableColumn(field='vol_quotient', title='volume ratio', editor=StringEditor(), default_sort='descending'), TableColumn(field='risk_score', title='risk', width=100, editor=StringEditor(), default_sort='descending'), TableColumn(field='spams', title='spams', width=130, editor=StringEditor(), default_sort='descending'), ]) callback = Callback(args={'tr': table_stocks_rank, 'sr': source_stocks_rank, 'symb': symbol, 'dialog_loading': dialog_loading}, code=callbacks.source_stocks_rank) source_stocks_rank.callback = callback return locals()
def create_objects(cls, symbol, df, securities): descr_box = Paragraph(text='content loading...') btn_close_loading = Button(label='Close Loading') dialog_loading = Dialog(title='loading', content=vplot(descr_box), name='loading_dialog', buttons=[btn_close_loading], visible=False) source_data = dict(df) main_source = ColumnDataSource(dict(df)) source = ColumnDataSource(source_data) # TODO: REMOVE THIS COMMENTED CODE! IT'S JUST THE PREVIOUS # VERSION USED BEFORE NEW P&D Cached results and algorithm # get the cached results of the P&D algorithm computed with the # "default" configuration # intervals = utils.cached_pumps.get(symbol, pumps.to_dicts(((),(),(),(),(),()))) # intervals['bottom'] = [0] * len(intervals['start']) # intervals['values'] = [max(df['price'])] * len(intervals['start']) # # intervals = pd.DataFrame(intervals) # new version stats = utils.get_symbols_cached_stats()[symbol] intervals = pd.DataFrame(stats) intervals['bottom'] = [0] * len(intervals['start']) intervals['values'] = [max(df['price'])] * len(intervals['start']) conv = lambda x: utils.to_seconds(pd.to_datetime(x)) intervals = intervals[ (pd.to_datetime(intervals['start']) > conv(config.date_range[0])) & (pd.to_datetime(intervals['start']) < conv(config.date_range[1]))] # Create P&Ds intervals DataSource intervals_source = ColumnDataSource(intervals) source.tags = ['main_source'] trends = utils.load_trends_data(symbol, start_date=min(df['dt'])) trends_source = ColumnDataSource(trends) trades = Slider(title="trades", name='trades', value=0, start=0, end=124, step=1) # Selectors symbol = Select.create(options=securities, value=symbol, name='symbol', title="") window_selector = Select.create(options=['---'], name='period_selector', title="Search intervals with:") symbol_filter = Select.create( options=['All', 'Stocks with Spam', 'Stocks without Spam'], name='symbol_filter', title="Filter Symbols:", value='Stocks with Spam') callback = Callback(args={ 'symbol_filter': symbol_filter, 'dialog_loading': dialog_loading }, code=callbacks.symbol_filter) symbol_filter.callback = callback btn_detect_pumps = Button(label='Configure P&D Detection', name='config_pumps') main_tab = Panel(title="Main") tabs = Tabs() # Create STOCKS TABLE ranks = utils.get_pumps_rank() # quotient_metrics = utils.get_quotient_metrics() # ranks['quotient'] = quotient_metrics['quotient'] foo = lambda x: utils.spams_count.get(x, 0) ranks['spams'] = map(foo, ranks['symbol']) ranks = ranks.sort(['spams', 'vol_quotient'], ascending=False) cls._pre_filtered_ranks = { 'All': {k: ranks[k] for k in ranks.columns}, 'Stocks with Spam': dict(ranks[ranks['spams'] > 0].sort('vol_quotient', ascending=False)), 'Stocks without Spam': dict(ranks[ranks['spams'] == 0].sort('vol_quotient', ascending=False)), } source_stocks_rank = ColumnDataSource(cls._pre_filtered_ranks['All']) table_stocks_rank = DataTable( source=source_stocks_rank, width=560, height=450, selectable=True, editable=True, columns=[ TableColumn(field='symbol', title='symbol', width=130, editor=StringEditor()), TableColumn(field='vol_quotient', title='volume ratio', editor=StringEditor(), default_sort='descending'), TableColumn(field='risk_score', title='risk', width=100, editor=StringEditor(), default_sort='descending'), TableColumn(field='spams', title='spams', width=130, editor=StringEditor(), default_sort='descending'), ]) callback = Callback(args={ 'tr': table_stocks_rank, 'sr': source_stocks_rank, 'symb': symbol, 'dialog_loading': dialog_loading }, code=callbacks.source_stocks_rank) source_stocks_rank.callback = callback return locals()