Python dependency_parse_tweetsの例

プログラミング言語: Python

名前空間/パッケージ名: twitter_dm

メソッド/関数: dependency_parse_tweets

hotexamples.comのコード掲載数: 7

Python dependency_parse_tweets - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtwitter_dm.dependency_parse_tweetsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: dependency_parse_user_tweets.py プロジェクト: kennyjoseph/twitter_dm

def do_dependency_parse(fil):
    u = TwitterUser()
    u.populate_tweets_from_file(fil,do_tokenize=False)
    out_file_name = fil.replace(".json","").replace(".gz","").replace("/json/","/dep_parse/")
    print out_file_name

    if len(u.tweets) == 0:
        os.utime(out_file_name)
        return 'empty, success'

    data = dependency_parse_tweets(TWEEBOPARSER_LOC,u.tweets,out_file_name)
    return 'completed'

コード例 #2

ファイルを表示

def do_dependency_parse(fil):
    u = TwitterUser()
    u.populate_tweets_from_file(fil, do_tokenize=False)
    out_file_name = fil.replace(".json", "").replace(".gz", "").replace(
        "/json/", "/dep_parse/")
    print out_file_name

    if len(u.tweets) == 0:
        os.utime(out_file_name)
        return 'empty, success'

    data = dependency_parse_tweets(TWEEBOPARSER_LOC, u.tweets, out_file_name)
    return 'completed'

コード例 #3

ファイルを表示

ファイル: 2_run_all_processing_on_ferg_data.py プロジェクト: nparyab/twitter_stereotype_extraction

def gen_dp(data):
    of_id, uid_list = data
    json_of_name = os.path.join(JSON_OUTPUT_DIRECTORY, str(of_id) + ".json.gz")
    dp_of_name = os.path.join(DP_OUTPUT_DIRECTORY, str(of_id) + ".dp")

    reader = [
        z.decode("utf8") for z in gzip.open(json_of_name).read().splitlines()
    ]
    tweets_to_write = [Tweet(json.loads(l), do_tokenize=False) for l in reader]

    if not os.path.exists(dp_of_name + ".gz"):
        print 'DOING DP', dp_of_name
        try:
            dp = dependency_parse_tweets(TWEEBOPARSER_LOC, tweets_to_write,
                                         dp_of_name)
        except:
            print 'FAILED DP STUFF: ', dp_of_name

コード例 #4

ファイルを表示

ファイル: run_identity_extraction.py プロジェクト: kennyjoseph/twitter_dm

def gen_dp(json_input_filename, dp_output_filename):
    """
    This function generates a dependency parse file (ending in dp) that will be used to create
     features for the identity extractor model. This process takes by far the longest of any
     process in this file. It calls out to a shell script that runs tweeboparser.

    :param json_input_filename: A (possibly cleaned, possibly gzipped) JSON file
    :param dp_output_filename: An output filename for the dependency parse
    :return:
    """

    reader = [z.decode("utf8") for z in gzip.open(json_input_filename).read().splitlines()]
    tweets_to_write = [Tweet(json.loads(l),do_tokenize=False) for l in reader]

    if not os.path.exists(dp_output_filename+".gz"):
        print 'DOING DP', dp_output_filename
        try:
            dp = dependency_parse_tweets(TWEEBOPARSER_LOC,tweets_to_write, dp_output_filename)
        except:
            print 'FAILED DP STUFF: ', dp_output_filename

コード例 #5

ファイルを表示

def gen_dp(json_input_filename, dp_output_filename):
    """
    This function generates a dependency parse file (ending in dp) that will be used to create
     features for the identity extractor model. This process takes by far the longest of any
     process in this file. It calls out to a shell script that runs tweeboparser.

    :param json_input_filename: A (possibly cleaned, possibly gzipped) JSON file
    :param dp_output_filename: An output filename for the dependency parse
    :return:
    """

    reader = [
        z.decode("utf8")
        for z in gzip.open(json_input_filename).read().splitlines()
    ]
    tweets_to_write = [Tweet(json.loads(l), do_tokenize=False) for l in reader]

    if not os.path.exists(dp_output_filename + ".gz"):
        print 'DOING DP', dp_output_filename
        try:
            dp = dependency_parse_tweets(TWEEBOPARSER_LOC, tweets_to_write,
                                         dp_output_filename)
        except:
            print 'FAILED DP STUFF: ', dp_output_filename

コード例 #6

ファイルを表示

ファイル: 3_dependency_parse_labeled_tweets.py プロジェクト: jakehop/identity_extraction_pub

__author__ = 'kjoseph'

from utility_code.util import *
from twitter_dm import dependency_parse_tweets
import codecs

tweet_id_to_tweet = get_original_tweet_data()

all_tweets = [v.tweet for v in tweet_id_to_tweet.values()]

parse_data = dependency_parse_tweets(
    TWEEBOPARSER_LOCATION,
    all_tweets,
    'processed_data/dependency_parsed_tweets.txt',
    gzip_final_output=False)[:-1]

write_dep_parse_with_tweet_ids_file = codecs.open("dep_parse_w_ids.txt", "w",
                                                  "utf8")

for i, parse in enumerate(parse_data):
    write_dep_parse_with_tweet_ids_file.write(str(all_tweets[i].id) + "\n")
    write_dep_parse_with_tweet_ids_file.write(parse)
    write_dep_parse_with_tweet_ids_file.write("\n\n")

write_dep_parse_with_tweet_ids_file.close()

コード例 #7

ファイルを表示

ファイル: 3_dependency_parse_labeled_tweets.py プロジェクト: kennyjoseph/identity_extraction_pub

__author__ = 'kjoseph'

from utility_code.util import *
from twitter_dm import dependency_parse_tweets
import codecs


tweet_id_to_tweet = get_original_tweet_data()

all_tweets = [v.tweet for v in tweet_id_to_tweet.values()]

parse_data = dependency_parse_tweets(TWEEBOPARSER_LOCATION,
                                     all_tweets,
                                     'processed_data/dependency_parsed_tweets.txt',
                                     gzip_final_output=False)[:-1]

write_dep_parse_with_tweet_ids_file = codecs.open("dep_parse_w_ids.txt","w","utf8")

for i, parse in enumerate(parse_data):
    write_dep_parse_with_tweet_ids_file.write(str(all_tweets[i].id) + "\n")
    write_dep_parse_with_tweet_ids_file.write(parse)
    write_dep_parse_with_tweet_ids_file.write("\n\n")

write_dep_parse_with_tweet_ids_file.close()