sys.path.insert(0, grand_dir) from Munge.loadCleanly import sheets as sh # fileroot = "SwiftMinaj" fileroot = "KanyeTrump" data_dir = os.path.realpath(os.path.abspath(os.path.join(this_dir,"../../../data_sets/"))) swfile = os.path.realpath(os.path.abspath(os.path.join(data_dir,fileroot+".json"))) swcsv = os.path.realpath(os.path.abspath(os.path.join(data_dir,fileroot+".csv"))) rows = [] with open(swfile) as f: search_text = re.compile('"text":') search_source = re.compile('"source":') for tweet_line in f.readlines(): # search_beg = re.compile("{") # search_end = re.compile("}") # beg = search_beg.search(tweet_line) # end = search_end.search(tweet_line) txtpos = search_text.search( tweet_line) srcpos = search_source.search(tweet_line) tweet_text = tweet_line[(txtpos.end()+1):(srcpos.start()-2)] # tweet_text = ast.literal_eval( txt2eval) rows += [[tweet_text,0]] sh.write_csv(swcsv, [], rows, dedupe=True)
#!/usr/bin/env python import csv import os, sys, inspect this_dir = os.path.realpath( os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0])) parent_dir = os.path.realpath(os.path.abspath(os.path.join(this_dir, "../"))) grand_dir = os.path.realpath(os.path.abspath(os.path.join(this_dir, "../../"))) sys.path.insert(0, this_dir) sys.path.insert(0, parent_dir) sys.path.insert(0, grand_dir) from twitter_api import * from Munge.loadCleanly import sheets as sh from Munge.loadCleanly.sheets import * # query = "president trump OR donald -jorge -ramos -vice -not -kanye -west -dog -bully -obama -impeached -dumb -racist -idiot -mex -purge -tele -outlaw" query = "#trump4pres OR #trump4president OR #trump2016 lang:en" tweets = GetTwitterQuery(query, 10000) tweet_text = [[T.text] for T in tweets] filename = 'Trump4pres.csv' sh.write_csv(filename, ['text'], tweet_text, dedupe=True)
#!/usr/bin/env python import csv import os, sys, inspect this_dir = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0])) parent_dir = os.path.realpath(os.path.abspath(os.path.join(this_dir, "../"))) grand_dir = os.path.realpath(os.path.abspath(os.path.join(this_dir, "../../"))) sys.path.insert(0, this_dir) sys.path.insert(0, parent_dir) sys.path.insert(0, grand_dir) from twitter_api import * from Munge.loadCleanly import sheets as sh from Munge.loadCleanly.sheets import * # query = "president trump OR donald -jorge -ramos -vice -not -kanye -west -dog -bully -obama -impeached -dumb -racist -idiot -mex -purge -tele -outlaw" query = "#trump4pres OR #trump4president OR #trump2016 lang:en" tweets = GetTwitterQuery(query, 10000) tweet_text = [[T.text] for T in tweets] filename = "Trump4pres.csv" sh.write_csv(filename, ["text"], tweet_text, dedupe=True)
# fileroot = "SwiftMinaj" fileroot = "KanyeTrump" data_dir = os.path.realpath( os.path.abspath(os.path.join(this_dir, "../../../data_sets/"))) swfile = os.path.realpath( os.path.abspath(os.path.join(data_dir, fileroot + ".json"))) swcsv = os.path.realpath( os.path.abspath(os.path.join(data_dir, fileroot + ".csv"))) rows = [] with open(swfile) as f: search_text = re.compile('"text":') search_source = re.compile('"source":') for tweet_line in f.readlines(): # search_beg = re.compile("{") # search_end = re.compile("}") # beg = search_beg.search(tweet_line) # end = search_end.search(tweet_line) txtpos = search_text.search(tweet_line) srcpos = search_source.search(tweet_line) tweet_text = tweet_line[(txtpos.end() + 1):(srcpos.start() - 2)] # tweet_text = ast.literal_eval( txt2eval) rows += [[tweet_text, 0]] sh.write_csv(swcsv, [], rows, dedupe=True)