コード例 #1
0
def gen_prob_time_by_username_fine():
    # same as "time_feat.gen_time_by_username.npz" in initial_analysis
    enr_df = utils.load_enroll()
    df = utils.load_log()
    min_date = utils.to_seconds(df['time'].min())
    df = df[df['event'] == 'problem']

    feat = []
    df = df.sort('time')
    for idx, row in df.groupby('username'):
        times = sorted(row['time'].tolist())
        first_time = utils.to_seconds(times[0])
        last_time = utils.to_seconds(times[-1])
        feat.append({
            'username': idx,
            'first_time': first_time - min_date,
            'last_time': last_time - min_date,
        })

    feat = pd.DataFrame(feat)
    enr_df = enr_df.merge(feat, how='left', on='username')
    enr_df['first_time'] = enr_df['first_time'].fillna(-1)
    enr_df['last_time'] = enr_df['last_time'].fillna(-1)

    return {
        'first': utils.reshape(enr_df['first_time']),
        'last': utils.reshape(enr_df['last_time']),
    }
コード例 #2
0
def gen_prob_time_by_enrollment_fine():
    # same as "time_feat.gen_first_time.npz" in initial_analysis
    enr_df = utils.load_enroll()

    df = utils.load_log()
    dx = df.groupby('course_id').agg({'time': 'min'}).reset_index()
    course_min_time = {}
    for idx, row in dx.iterrows():
        course_min_time[row['course_id']] = utils.to_seconds(row['time'])

    feat = []
    df = df.sort('time')
    df = df[df['event'] == 'problem']
    for idx, row in df.groupby('enrollment_id'):
        times = sorted(row['time'].tolist())
        course_id = row['course_id'].tolist()[0]
        first_time = utils.to_seconds(times[0])
        last_time = utils.to_seconds(times[-1])
        min_time = course_min_time[course_id]
        feat.append({
            'enrollment_id': idx,
            'first_time': first_time - min_time,
            'last_time': last_time - min_time,
        })

    feat = pd.DataFrame(feat)
    enr_df = enr_df.merge(feat, how='left', on='enrollment_id')
    enr_df['first_time'] = enr_df['first_time'].fillna(-1)
    enr_df['last_time'] = enr_df['last_time'].fillna(-1)

    return {
        'first': utils.reshape(enr_df['first_time']),
        'last': utils.reshape(enr_df['last_time']),
    }
コード例 #3
0
def gen_user_event_last_time():
    enr_df = utils.load_enroll()
    df = utils.load_log()
    min_date = utils.to_seconds(df['time'].min())
    df['course_id_x_event'] = df['course_id'] + 'x' + df['event']

    feat = []
    df = df.sort('time')
    for idx, row in df.groupby(['username', 'course_id_x_event']):
        times = sorted(row['time'].tolist())
        last_time = utils.to_seconds(times[-1])
        feat.append({
            'username': idx[0],
            'course_id_x_event': idx[1],
            'last_time': last_time - min_date,
        })

    feat = pd.DataFrame(feat)
    featp = feat.pivot_table(values='last_time',
                             index='username',
                             columns='course_id_x_event').reset_index()
    colsz = len(featp.columns) - 1
    featp.columns = ['username'] + list(range(colsz))

    enr_df = enr_df.merge(featp, how='left', on='username')
    enr_df.fillna(-1, inplace=True)

    return {'X': np.array(enr_df[list(range(colsz))])}
コード例 #4
0
def gen_prob_first_last_in_judgement_time():
    enr_df = utils.load_enroll()

    df = utils.load_log()
    df = df[df['event'] == 'problem']

    df_by_course = df.groupby('course_id').agg({'time': 'max'}).reset_index()
    course_evaluation_period = {
        row['course_id']: utils.to_evaluation_period(row['time'], days=1)
        for idx, row in df_by_course.iterrows()
    }
    course_list = course_evaluation_period.keys()

    course_df = {
        course_id: df[
            (df['time'] >= course_evaluation_period[course_id]['begin']) &
            (df['time'] <= course_evaluation_period[course_id]['end'])
        ]
        for course_id in course_list
    }

    feat = []
    df = df.sort('time')
    sz = len(df)
    for i, (idx, df_part) in enumerate(df.groupby(['username', 'course_id'])):
        if i % 100 == 0:
            l.info("{0} of 200k".format(i))
        username = idx[0]
        course_id = idx[1]
        d = course_df[course_id][
            (course_df[course_id]['username'] == username)
        ]
        first_time = -1 if len(d) == 0 else utils.to_seconds(d['time'].min())
        last_time = -1 if len(d) == 0 else utils.to_seconds(d['time'].max())

        feat.append({
            'username': idx[0],
            'course_id': idx[1],
            'last_time': last_time,
            'first_time': first_time,
        })

    feat = pd.DataFrame(feat)
    enr_df = enr_df.merge(feat, how='left', on=['username', 'course_id'])
    enr_df.fillna(-1, inplace=True)

    return {
        'first_time': utils.reshape(enr_df['first_time']),
        'last_time': utils.reshape(enr_df['last_time']),
    }
コード例 #5
0
ファイル: feat.py プロジェクト: ContinuumIO/xdata-feat
def serve_peaks():
    args = request.args
    start_dates,  last_quiet_dates,  end_dates, start_prices, last_quiet_prices, end_prices = pumps.find_pumps_easy(
        request.args['s'],
        orig_dir="data/securities",
        cache_dir="data/securities/cached",
        min_quiet_days=int(args['min_quiet_days']),
        quiet_tol=float(args['quiet_tol'] if '.' in args['quiet_tol'] else args['quiet_tol'] + '.'),
        min_growth_days=int(args['min_growth_days']),
        max_growth_days=int(args['max_growth_days']),
        growth_tol=float(args['growth_tol'] if '.' in args['growth_tol'] else args['growth_tol'] + '.'),
        silent=True,
    )

    conv = lambda x: utils.to_seconds(pd.to_datetime(x))
    res = {
        'results': sorted(
            [{'start': s, 'end':  e}
             for s, e in zip(
                sorted(map(utils.to_seconds, start_dates)), 
                sorted(map(utils.to_seconds, end_dates)))
             if s and s > conv(config.date_range[0]) and s < conv(config.date_range[1])
             ]
        )
    }
    
    return jsonify(res)
コード例 #6
0
    def run(self):
        """ run """

        count = 1
        while count <= self.opts.visits:
            youtube = YouTube(url=self.opts.url,
                              proxy=self.opts.proxy,
                              verbose=self.opts.verbose)
            youtube.get_url()
            title = youtube.get_title()
            if self.opts.visits > 1 and title:
                length = (len(title) + 4 - len(str(count)))
                print('[{0}] {1}'.format(count, '-' * length))
            ip_address = utils.get_ipaddr(proxy=self.opts.proxy)
            if ip_address:
                print('external IP address:', ip_address)
            if title:
                print('title:', title)
            youtube.play_video()
            youtube.get_views()
            video_duration = youtube.time_duration()
            if video_duration:
                print('video duration time:', video_duration)
            seconds = utils.to_seconds(duration=video_duration.split(':'))
            if seconds:
                sleep_time = randrange(seconds)
                if self.opts.verbose:
                    print('video duration time in seconds:', seconds)
                print('stopping video in %s seconds' % sleep_time)
                time.sleep(sleep_time)
            youtube.disconnect()
            count += 1
コード例 #7
0
ファイル: feat.py プロジェクト: vishalbelsare/xdata-feat
def serve_peaks():
    args = request.args
    start_dates, last_quiet_dates, end_dates, start_prices, last_quiet_prices, end_prices = pumps.find_pumps_easy(
        request.args['s'],
        orig_dir="data/securities",
        cache_dir="data/securities/cached",
        min_quiet_days=int(args['min_quiet_days']),
        quiet_tol=float(args['quiet_tol'] if '.' in
                        args['quiet_tol'] else args['quiet_tol'] + '.'),
        min_growth_days=int(args['min_growth_days']),
        max_growth_days=int(args['max_growth_days']),
        growth_tol=float(args['growth_tol'] if '.' in
                         args['growth_tol'] else args['growth_tol'] + '.'),
        silent=True,
    )

    conv = lambda x: utils.to_seconds(pd.to_datetime(x))
    res = {
        'results':
        sorted([{
            'start': s,
            'end': e
        } for s, e in zip(sorted(map(utils.to_seconds, start_dates)),
                          sorted(map(utils.to_seconds, end_dates)))
                if s and s > conv(config.date_range[0])
                and s < conv(config.date_range[1])])
    }

    return jsonify(res)
コード例 #8
0
ファイル: pumps.py プロジェクト: vishalbelsare/xdata-feat
def to_dicts(candidates):
    import utils
    """
  Converts find_pumps results (tuple of 6 tuples) into and array of dicts
  """
    sds, lqds, eds, sps, lqps, eps = candidates
    res = [{
        'start': utils.to_seconds(s),
        'end': utils.to_seconds(e),
        'last_quiet_date': lqd,
        'start_prices': sp,
        'last_quiet_price': lqp,
        'end_price': ep,
    } for (s, e, lqd, sp, lqp, ep) in zip(sds, eds, lqds, sps, lqps, eps) if s]
    res = {
        'start': [utils.to_seconds(s) for s in sds],
        'end': [utils.to_seconds(e) for e in eds],
        'last_quiet_date': lqds,
        'start_prices': sps,
        'last_quiet_price': lqps,
        'end_price': eps,
    }
    return res
コード例 #9
0
ファイル: pumps.py プロジェクト: ContinuumIO/xdata-feat
def to_dicts(candidates):
  import utils
  """
  Converts find_pumps results (tuple of 6 tuples) into and array of dicts
  """
  sds,  lqds,  eds, sps, lqps, eps = candidates
  res = [{'start': utils.to_seconds(s),
          'end': utils.to_seconds(e),
          'last_quiet_date': lqd,
          'start_prices': sp,
          'last_quiet_price': lqp,
          'end_price': ep,
          }
         for (s, e, lqd, sp, lqp, ep) in zip(sds, eds, lqds, sps, lqps, eps) if s
  ]
  res = {
    'start': [utils.to_seconds(s) for s in sds],
    'end': [utils.to_seconds(e) for e in eds],
    'last_quiet_date': lqds,
    'start_prices': sps,
    'last_quiet_price': lqps,
    'end_price': eps,
  }
  return res
コード例 #10
0
ファイル: dashboard.py プロジェクト: ContinuumIO/xdata-feat
    def create_objects(cls, symbol, df, securities):
        descr_box = Paragraph(text='content loading...')

        btn_close_loading = Button(label='Close Loading')
        dialog_loading = Dialog(
            title='loading', content=vplot(descr_box), name='loading_dialog',
            buttons=[btn_close_loading], visible=False)

        source_data = dict(df)
        main_source = ColumnDataSource(dict(df))
        source = ColumnDataSource(source_data)

        # TODO: REMOVE THIS COMMENTED CODE! IT'S JUST THE PREVIOUS
        # VERSION USED BEFORE NEW P&D Cached results and algorithm
        # get the cached results of the P&D algorithm computed with the
        # "default" configuration
        # intervals = utils.cached_pumps.get(symbol, pumps.to_dicts(((),(),(),(),(),())))
        # intervals['bottom'] = [0] * len(intervals['start'])
        # intervals['values'] = [max(df['price'])] * len(intervals['start'])
        #
        # intervals = pd.DataFrame(intervals)

        # new version
        stats = utils.get_symbols_cached_stats()[symbol]
        intervals = pd.DataFrame(stats)
        intervals['bottom'] = [0] * len(intervals['start'])
        intervals['values'] = [max(df['price'])] * len(intervals['start'])

        conv = lambda x: utils.to_seconds(pd.to_datetime(x))

        intervals = intervals[
            (pd.to_datetime(intervals['start']) > conv(config.date_range[0])) &
            (pd.to_datetime(intervals['start']) < conv(config.date_range[1]))
        ]

        # Create P&Ds intervals DataSource
        intervals_source = ColumnDataSource(intervals)
        source.tags = ['main_source']

        trends = utils.load_trends_data(symbol, start_date=min(df['dt']))
        trends_source = ColumnDataSource(trends)

        trades = Slider(
            title="trades", name='trades',
            value=0, start=0, end=124, step=1
        )

        # Selectors
        symbol = Select.create(
            options=securities, value=symbol, name='symbol', title=""
        )
        window_selector = Select.create(
            options=['---'], name='period_selector', title="Search intervals with:"
        )
        symbol_filter = Select.create(
            options=['All', 'Stocks with Spam', 'Stocks without Spam'],
            name='symbol_filter', title="Filter Symbols:",
            value='Stocks with Spam'
        )
        callback = Callback(
            args={'symbol_filter': symbol_filter,
                  'dialog_loading': dialog_loading},
            code=callbacks.symbol_filter
        )
        symbol_filter.callback = callback


        btn_detect_pumps = Button(label='Configure P&D Detection', name='config_pumps')

        main_tab = Panel(title="Main")
        tabs = Tabs()

        # Create STOCKS TABLE
        ranks = utils.get_pumps_rank()
        # quotient_metrics = utils.get_quotient_metrics()
        # ranks['quotient'] = quotient_metrics['quotient']

        foo = lambda x: utils.spams_count.get(x, 0)
        ranks['spams'] = map(foo, ranks['symbol'])
        ranks = ranks.sort(['spams', 'vol_quotient'], ascending=False)

        cls._pre_filtered_ranks = {
            'All': {k: ranks[k] for k in ranks.columns},
            'Stocks with Spam': dict(ranks[ranks['spams'] > 0].
                                     sort('vol_quotient', ascending=False)),
            'Stocks without Spam': dict(ranks[ranks['spams'] == 0].
                                        sort('vol_quotient', ascending=False)),
        }

        source_stocks_rank = ColumnDataSource(cls._pre_filtered_ranks['All'])


        table_stocks_rank = DataTable(
            source=source_stocks_rank, width=560, height=450,
            selectable=True, editable=True,
            columns=[
                TableColumn(field='symbol', title='symbol', width=130, editor=StringEditor()),
                TableColumn(field='vol_quotient', title='volume ratio', editor=StringEditor(),
                            default_sort='descending'),
                TableColumn(field='risk_score', title='risk', width=100, editor=StringEditor(),
                            default_sort='descending'),
                TableColumn(field='spams', title='spams', width=130, editor=StringEditor(),
                            default_sort='descending'),
            ])

        callback = Callback(args={'tr': table_stocks_rank, 'sr': source_stocks_rank, 'symb': symbol,
                                  'dialog_loading': dialog_loading},
                            code=callbacks.source_stocks_rank)
        source_stocks_rank.callback = callback

        return locals()
コード例 #11
0
    def create_objects(cls, symbol, df, securities):
        descr_box = Paragraph(text='content loading...')

        btn_close_loading = Button(label='Close Loading')
        dialog_loading = Dialog(title='loading',
                                content=vplot(descr_box),
                                name='loading_dialog',
                                buttons=[btn_close_loading],
                                visible=False)

        source_data = dict(df)
        main_source = ColumnDataSource(dict(df))
        source = ColumnDataSource(source_data)

        # TODO: REMOVE THIS COMMENTED CODE! IT'S JUST THE PREVIOUS
        # VERSION USED BEFORE NEW P&D Cached results and algorithm
        # get the cached results of the P&D algorithm computed with the
        # "default" configuration
        # intervals = utils.cached_pumps.get(symbol, pumps.to_dicts(((),(),(),(),(),())))
        # intervals['bottom'] = [0] * len(intervals['start'])
        # intervals['values'] = [max(df['price'])] * len(intervals['start'])
        #
        # intervals = pd.DataFrame(intervals)

        # new version
        stats = utils.get_symbols_cached_stats()[symbol]
        intervals = pd.DataFrame(stats)
        intervals['bottom'] = [0] * len(intervals['start'])
        intervals['values'] = [max(df['price'])] * len(intervals['start'])

        conv = lambda x: utils.to_seconds(pd.to_datetime(x))

        intervals = intervals[
            (pd.to_datetime(intervals['start']) > conv(config.date_range[0])) &
            (pd.to_datetime(intervals['start']) < conv(config.date_range[1]))]

        # Create P&Ds intervals DataSource
        intervals_source = ColumnDataSource(intervals)
        source.tags = ['main_source']

        trends = utils.load_trends_data(symbol, start_date=min(df['dt']))
        trends_source = ColumnDataSource(trends)

        trades = Slider(title="trades",
                        name='trades',
                        value=0,
                        start=0,
                        end=124,
                        step=1)

        # Selectors
        symbol = Select.create(options=securities,
                               value=symbol,
                               name='symbol',
                               title="")
        window_selector = Select.create(options=['---'],
                                        name='period_selector',
                                        title="Search intervals with:")
        symbol_filter = Select.create(
            options=['All', 'Stocks with Spam', 'Stocks without Spam'],
            name='symbol_filter',
            title="Filter Symbols:",
            value='Stocks with Spam')
        callback = Callback(args={
            'symbol_filter': symbol_filter,
            'dialog_loading': dialog_loading
        },
                            code=callbacks.symbol_filter)
        symbol_filter.callback = callback

        btn_detect_pumps = Button(label='Configure P&D Detection',
                                  name='config_pumps')

        main_tab = Panel(title="Main")
        tabs = Tabs()

        # Create STOCKS TABLE
        ranks = utils.get_pumps_rank()
        # quotient_metrics = utils.get_quotient_metrics()
        # ranks['quotient'] = quotient_metrics['quotient']

        foo = lambda x: utils.spams_count.get(x, 0)
        ranks['spams'] = map(foo, ranks['symbol'])
        ranks = ranks.sort(['spams', 'vol_quotient'], ascending=False)

        cls._pre_filtered_ranks = {
            'All': {k: ranks[k]
                    for k in ranks.columns},
            'Stocks with Spam':
            dict(ranks[ranks['spams'] > 0].sort('vol_quotient',
                                                ascending=False)),
            'Stocks without Spam':
            dict(ranks[ranks['spams'] == 0].sort('vol_quotient',
                                                 ascending=False)),
        }

        source_stocks_rank = ColumnDataSource(cls._pre_filtered_ranks['All'])

        table_stocks_rank = DataTable(
            source=source_stocks_rank,
            width=560,
            height=450,
            selectable=True,
            editable=True,
            columns=[
                TableColumn(field='symbol',
                            title='symbol',
                            width=130,
                            editor=StringEditor()),
                TableColumn(field='vol_quotient',
                            title='volume ratio',
                            editor=StringEditor(),
                            default_sort='descending'),
                TableColumn(field='risk_score',
                            title='risk',
                            width=100,
                            editor=StringEditor(),
                            default_sort='descending'),
                TableColumn(field='spams',
                            title='spams',
                            width=130,
                            editor=StringEditor(),
                            default_sort='descending'),
            ])

        callback = Callback(args={
            'tr': table_stocks_rank,
            'sr': source_stocks_rank,
            'symb': symbol,
            'dialog_loading': dialog_loading
        },
                            code=callbacks.source_stocks_rank)
        source_stocks_rank.callback = callback

        return locals()