Python Operations.append_tokensの例

プログラミング言語: Python

名前空間/パッケージ名: operations

クラス/型: Operations

メソッド/関数: append_tokens

hotexamples.comのコード掲載数: 2

Python Operations.append_tokens - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのoperations.Operations.append_tokensの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Operations(20)

add(4)

clean_string_column(3)

accounts(3)

addition(3)

basic_function(2)

doConvolution(2)

create_dataframe(2)

__init__(2)

append_tokens(2)

SUBS(1)

absolute(1)

WRITE(1)

TYPE(1)

PUSHS(1)

SUB(1)

STRLEN(1)

PointsPosition(1)

STRI2INT(1)

SETCHAR(1)

RETURN(1)

READ(1)

STRI2INTS(1)

ADD(1)

aggregate_times(1)

binary_operation(1)

make_plot_csv(1)

fromCode(1)

computePower(1)

computeFibonacci(1)

bootVolumeDelete(1)

bootVolumeAttach(1)

avg(1)

and_op(1)

average_price_thirty_percent(1)

average_listing_selling_price(1)

authenticate(1)

append_tf_idf(1)

append_ranks(1)

PUSHFRAME(1)

append_date_columns(1)

POPFRAME(1)

POPS(1)

DPRINT(1)

GTS(1)

GT(1)

GETCHAR(1)

FLOAT2INT(1)

EXIT(1)

EQS(1)

コード例 #1

ファイルを表示

ファイル: test_operations.py プロジェクト: ari99/wiki_stats

def test_append_tokens(df):
    """
    Tests operations.append_tokens.
    Args:
        :param df: DataFrame object from fixture.
    """
    ops = Operations()
    df =ops.clean_string_column(df, 'pagename')
    df = ops.append_tokens(df)
    #The stop word 'first' is removed
    # see for a list of stop words: http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words
    assert df.where(df['pageviews']==60).select("tokens").first()['tokens'] == ['john', 'person', '100', 'day']

コード例 #2

ファイルを表示

ファイル: wiki_stats.py プロジェクト: ari99/wiki_stats


from operations import Operations

ops = Operations()
#Create the dataframe from the lines RDD
df = ops.create_dataframe(lines, sqlContext)
#Clean the 'pagename' column of encoded characters
df = ops.clean_string_column(df, 'pagename')
#Add columns for hour, day, month, year from the file name
df = ops.append_date_columns(df)

#Group by timeframes
hour_df, day_df, month_df, year_df = ops.aggregate_times(df)
#Create tokens from the pagename
hour_df = ops.append_tokens(hour_df)
#Add term frequency and inverse document frequency
hour_df = ops.append_tf_idf(hour_df)
#Create ranking
hour_df, day_df, month_df, year_df = ops.append_ranks(hour_df, day_df, month_df, year_df)

#Get the top 200 for each timeframe
top_hourly = hour_df.filter(hour_df['hour_rank']<201)
top_daily =  day_df.filter(day_df['day_rank']<201)
top_monthly =  month_df.filter(month_df['month_rank']<201)
top_yearly =  year_df.filter(year_df['year_rank']<201)

#Create files on s3 with the results
ops.make_plot_csv(top_hourly,"hourly")
ops.make_plot_csv(top_daily,"daily")
ops.make_plot_csv(top_monthly,"monthly")