def get_data(self,locationid): entity_trend_dict = {} try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """select t1.entity,t2.trend from (select id,entity from id_entity ) as t1 inner join (select id,trend from organized_tweets where trend in (select trend from (select count(*) as c,trend from trends where locationid = %s group by trend)as t_in order by c desc limit 15))as t2 on t1.id = t2.id""" cursor.execute(query,(locationid,)) entity_column = 0 trend_column = 1 for row in cursor: id = row[trend_column] if id in entity_id_dict.keys(): entity_list = entity_id_dict[id] entity_list.append(row[entity_column]) entity_id_dict[id] = entity_list else: entity_id_dict[id] = [row[entity_column]] except Exception: print traceback.format_exc() return entity_trend_dict
def build(self): conn = PostgresConnector().get_connection() cursor = conn.cursor() query = 'select id,entities,trend from organized_tweets' cursor.execute(query) id_column = 0 entities_column = 1 trend_column = 2 with open('copy_from.txt','w') as f: for row in cursor: tweet_id = row[id_column] trend = row[trend_column] hashtag_array = row[entities_column] json_array = json.loads(hashtag_array) hashtag_list = [hashtag["text"] for hashtag in json_array] hashtag_list_unique = list(set(hashtag_list)) print 'Writing data to table for the tweet_id ' +tweet_id for hashtag in hashtag_list_unique: f.write(tweet_id + '\t' + hashtag.encode('utf-8') + '\t' + trend + '\n') with open('copy_from.txt') as f: cursor.copy_from(f, 'id_entity', columns=('id', 'entity','trend')) conn.commit() os.remove('copy_from.txt')
def build(self): conn = PostgresConnector().get_connection() cursor = conn.cursor() query = 'select id,entities,trend from organized_tweets' cursor.execute(query) id_column = 0 entities_column = 1 trend_column = 2 with open('copy_from.txt', 'w') as f: for row in cursor: tweet_id = row[id_column] trend = row[trend_column] hashtag_array = row[entities_column] json_array = json.loads(hashtag_array) hashtag_list = [hashtag["text"] for hashtag in json_array] hashtag_list_unique = list(set(hashtag_list)) print 'Writing data to table for the tweet_id ' + tweet_id for hashtag in hashtag_list_unique: f.write(tweet_id + '\t' + hashtag.encode('utf-8') + '\t' + trend + '\n') with open('copy_from.txt') as f: cursor.copy_from(f, 'id_entity', columns=('id', 'entity', 'trend')) conn.commit() os.remove('copy_from.txt')
def get_dates_location(self,locationid): min_max_date_list = [] try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """ select max(date),min(date) from trends where trend in (select t1.trend as trend from (select count(*) as c,trend from trends where locationid = %s group by trend)as t1 order by c desc limit 15) and locationid = %s """ cursor.execute(query,(locationid,locationid)) min_date_column = 1 max_date_column = 0 for row in cursor: min_max_date_dict = {} min_max_date_dict["min_date"] = str(row[min_date_column]) min_max_date_dict["max_date"] = str(row[max_date_column]) min_max_date_list.append(min_max_date_dict) except Exception: traceback.format_exc() return min_max_date_list
def get_trends(self, location_id, start_date, end_date): trends_list = [] try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """ select c,trend from (select count(*) as c,trend from trends where locationid = %s and date between %s and %s and id in(select trendid from tweets) group by trend) as t1 order by c desc limit 15 """ cursor.execute(query, (location_id, start_date, end_date)) trend_column = 1 count_column = 0 for row in cursor: trend_count = {} trend_count["trend"] = row[trend_column] trend_count["count"] = row[count_column] trends_list.append(trend_count) except Exception as e: print e return trends_list
def get_dates_location(self, locationid): min_max_date_list = [] try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """ select max(date),min(date) from trends where trend in (select t1.trend as trend from (select count(*) as c,trend from trends where locationid = %s group by trend)as t1 order by c desc limit 15) and locationid = %s """ cursor.execute(query, (locationid, locationid)) min_date_column = 1 max_date_column = 0 for row in cursor: min_max_date_dict = {} min_max_date_dict["min_date"] = str(row[min_date_column]) min_max_date_dict["max_date"] = str(row[max_date_column]) min_max_date_list.append(min_max_date_dict) except Exception: traceback.format_exc() return min_max_date_list
def get_total_documents(self): conn = PostgresConnector().get_connection() cursor = conn.cursor() query = 'select count(distinct(id)) from "IdEntity" ' cursor.execute(query) count_of_distinct_id_column = 0 total_documents_count = 0 for row in cursor: total_documents_count = row[count_of_distinct_id_column] return total_documents_count
def get_sentiments(self): conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """ select text from organized_tweets limit 10 """ cursor.execute(query) for row in cursor: text = row[0] blob = TextBlob(text,analyzer=NaiveBayesAnalyzer()) print blob.sentiment
def get_locations(self): conn = PostgresConnector().get_connection() cursor = conn.cursor() query = 'SELECT id,city from location'; cursor.execute(query) id_column = 0 city_column = 1 locations_list = [] for row in cursor: id_location = {} id_location["geoid"] = row[id_column] id_location["city"] = row[city_column] locations_list.append(id_location) return locations_list
def build_tf(self): # using group by first get tf score for each entity conn = PostgresConnector().get_connection() cursor = conn.cursor() query = 'select count(id),entity from "IdEntity" group by entity' cursor.execute(query) count_of_id_column = 0 entities_column = 1 entity_id_dict = {} for row in cursor: count_of_id = row[count_of_id_column] entity = row[entities_column] entity_id_dict[entity] = count_of_id return entity_id_dict
def get_locations(self): conn = PostgresConnector().get_connection() cursor = conn.cursor() query = 'SELECT id,city from location' cursor.execute(query) id_column = 0 city_column = 1 locations_list = [] for row in cursor: id_location = {} id_location["geoid"] = row[id_column] id_location["city"] = row[city_column] locations_list.append(id_location) return locations_list
def get_tfidf(self, locationid, trend): tfidf_list = [] try: conn = PostgresConnector().get_connection() cursor = conn.cursor() tfidf_query = """ select entity,tf_idf_score from (select t4.entity,sum(t4.tf_idf) as tf_idf_score from (select t1.id,t1.entity,t2.count_id,t3.count_entity, (1.0/t3.count_entity)*log(( select count(*) from organized_tweets where trend = %s and location_id = %s )/t2.count_id) as tf_idf from (select id,entity from id_entity where id in (select id from organized_tweets where trend = %s and location_id = %s)) as t1 inner join (select entity,count(id) as count_id from id_entity where id in (select id from organized_tweets where trend = %s and location_id = %s)group by entity) as t2 on t1.entity = t2.entity inner join (select id,count(entity) as count_entity from id_entity where id in(select id from organized_tweets where trend = %s and location_id = %s )group by id) as t3 on t1.id = t3.id) as t4 group by entity)as t5 order by tf_idf_score desc limit 100; """ cursor.execute(tfidf_query, (trend, locationid, trend, locationid, trend, locationid, trend, locationid)) entity_column = 0 tfidf_column = 1 for row in cursor: entity_tfidf_score = {} entity_tfidf_score["entity"] = row[entity_column] entity_tfidf_score["tfidf"] = row[tfidf_column] tfidf_list.append(entity_tfidf_score) return tfidf_list except Exception: print traceback.format_exc()
def get_tweets(self, trend, entity): conn = PostgresConnector().get_connection() cursor = conn.cursor() query_tweets = """ select text from organized_tweets where id in (select id from id_entity where entity = %s) limit 50 """ cursor.execute(query_tweets, (entity, )) text_list = [] for row in cursor: text_dict = {} text_dict["name"] = row[0] text_list.append(text_dict) return text_list
def get_sentiments(self): conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """ select id,text from organized_tweets """ cursor.execute(query) id_column = 0 text_column = 1 with open("sentiments.tsv","w") as f: for row in cursor: text = row[text_column] blob = TextBlob(text,analyzer=NaiveBayesAnalyzer()) print 'writing for tweet with id ' +str(row[id_column]) f.write(str(row[id_column])+'\t'+str(blob.sentiment.classification)+'\t'+str(blob.sentiment.p_pos)+'\t'+str(blob.sentiment.p_neg)+'\n')
def get_tfidf(self,locationid,trend): tfidf_list = [] try: conn = PostgresConnector().get_connection() cursor = conn.cursor() tfidf_query = """ select entity,tf_idf_score from (select t4.entity,sum(t4.tf_idf) as tf_idf_score from (select t1.id,t1.entity,t2.count_id,t3.count_entity, (1.0/t3.count_entity)*log(( select count(*) from organized_tweets where trend = %s and location_id = %s )/t2.count_id) as tf_idf from (select id,entity from id_entity where id in (select id from organized_tweets where trend = %s and location_id = %s)) as t1 inner join (select entity,count(id) as count_id from id_entity where id in (select id from organized_tweets where trend = %s and location_id = %s)group by entity) as t2 on t1.entity = t2.entity inner join (select id,count(entity) as count_entity from id_entity where id in(select id from organized_tweets where trend = %s and location_id = %s )group by id) as t3 on t1.id = t3.id) as t4 group by entity)as t5 order by tf_idf_score desc; """ cursor.execute(tfidf_query,(trend,locationid,trend,locationid,trend,locationid,trend,locationid)) entity_column = 0 tfidf_column = 1 for row in cursor: entity_tfidf_score = {} entity_tfidf_score["entity"] = row[entity_column] entity_tfidf_score["tfidf"] = row[tfidf_column] tfidf_list.append(entity_tfidf_score) return tfidf_list except Exception : print traceback.format_exc()
def get_sentiments(self): conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """ select id,text from organized_tweets """ cursor.execute(query) id_column = 0 text_column = 1 with open("sentiments.tsv", "w") as f: for row in cursor: text = row[text_column] blob = TextBlob(text, analyzer=NaiveBayesAnalyzer()) print 'writing for tweet with id ' + str(row[id_column]) f.write( str(row[id_column]) + '\t' + str(blob.sentiment.classification) + '\t' + str(blob.sentiment.p_pos) + '\t' + str(blob.sentiment.p_neg) + '\n')
def validate_data(self,vehicle_type,toll_type,date,price,vehicle_no): if vehicle_type.strip() == '' or toll_type.strip() == '' or price.strip() == '' or date.strip() == '' or vehicle_no.strip() =='': # figure out what to return here! raise Exception('input data has nulls') else: try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """ INSERT INTO transactions (vehicle_type,toll_type,timestamp,price,vehicle_no ) values(%s,%s,\'%s\',%s,\'%s\') """ % (vehicle_type,toll_type,date,float(price),vehicle_no) #print "Inserting data to table using the query %s" % (query,) cursor.execute(query) conn.commit() return 'Success' except psycopg2.IntegrityError as e: raise Exception(' Unique key constraint failed ') except Exception as e: print e raise Exception(' Something else went wrong')
def validate_data(self,vehicle_no,time): if vehicle_no.strip() == '' and time.strip() == '': # figure out what to return here! raise Exception('input data has nulls') else: try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query = "" if vehicle_no.strip() != '' and time.strip() != '': query = """ SELECT * FROM transactions where vehicle_no = \'%s\' and timestamp = \'%s\' order by timestamp """ % (vehicle_no,time) elif vehicle_no.strip() != '' : query = """ select t2.vehicle_type,t2.toll_type,t1.timestamp,t1.price,t1.vehicle_no from (SELECT * FROM transactions where vehicle_no =\'%s\') as t1 inner join (select * from master_data) as t2 on t2.vehicle_type_id = t1.vehicle_type and t2.toll_type_id = t1.toll_type """ % (vehicle_no,) else: query = """ SELECT * FROM transactions where timestamp = \'%s\' order by timestamp """ % (time,) print "selecting data from the table using the query %s" % (query,) cursor.execute(query) search_list = [] for row in cursor: elements = {} elements['vehicle_type'] = row[0] elements['toll_type'] = row[1] elements['time'] = str(row[2]) elements['price'] = row[3] elements['vehicle_no'] = row[4] search_list.append(elements) return search_list except Exception: print traceback.format_exc()
def build(self): conn = PostgresConnector().get_connection() cursor = conn.cursor() query = 'select id,hashtags from "organizedTweets" ' cursor.execute(query) id_column = 0 entities_column = 1 entity_id_dict = {} for row in cursor: tweet_id = row[id_column] hashtag_array = row[entities_column] hashtag_list = [hashtag['text'] for hashtag in hashtag_array] for entity in hashtag_list: if entity in entity_id_dict.keys(): id_list = entity_id_dict[entity] id_list.append(tweet_id) entity_id_dict[entity] = id_list else: id_list = [] id_list.append(tweet_id) entity_id_dict[entity] = id_list return entity_id_dict
def get_data(self): out_list = [] try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """ SELECT vehicle_type,vehicle_type_id, toll_type,toll_type_id,price from master_data""" cursor.execute(query) resultset = cursor.fetchall() out_list = [] vehicle_dict = {} for row in resultset: row_data = {} row_value_dict = {} id = row[1] vehicle_name = row[0] journey_list = [] if id not in vehicle_dict.keys(): vehicle_dict_out = {} vehicle_dict[id] = 1 for row_in in resultset: journey_type = {} if row_in[1] == id: journey_type['toll_type'] = row_in[2] journey_type['toll_id'] = str(row_in[3]) journey_type['price'] = str(row_in[4]) journey_list.append(journey_type) vehicle_dict_out['vehicle_type'] = vehicle_name vehicle_dict_out['vehicle_type_id'] = id vehicle_dict_out['journey'] = journey_list out_list.append(vehicle_dict_out) return out_list except Exception as e: print e raise Exception(' Something went wrong while retrieving data')
def get_trends(self,location_id,start_date,end_date): trends_list = [] try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """ select c,trend from (select count(*) as c,trend from trends where locationid = %s and date between %s and %s group by trend)as t1 order by c desc limit 15 """ cursor.execute(query,(location_id,start_date,end_date)) trend_column = 1 count_column = 0 for row in cursor: trend_count = {} trend_count["trend"] = row[trend_column] trend_count["count"] = row[count_column] trends_list.append(trend_count) except Exception as e: print e return trends_list
def update_organized_tweets(self): tweet_id_dict = {} try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query_location = 'select id from location' cursor.execute(query_location) location_column = 0 for row_location in cursor: query = """ select id,trend from trends where trend in(select trend from (select count(*) as c,trend from trends where locationid = %s group by trend)as t1 order by c desc limit 80) """ cursor = conn.cursor() location_id = row_location[location_column] cursor.execute(query, (location_id, )) trend_id_column = 0 trend_name_column = 1 trend_count = 0 for row in cursor: trend_count = trend_count + 1 trend_id = row[trend_id_column] trend_name = row[trend_name_column] print 'Processing for trend ' + trend_id + ' , ' + str( trend_count) query_tweets = 'select tweets from tweets where trendId = \'' + str( trend_id) + '\'' cursor_tweets = conn.cursor() cursor_tweets.execute(query_tweets) tweets_column = 0 with open(trend_name + '.txt', 'w') as f: # rows of tweets array for tweets_row in cursor_tweets: tweets_json_array = tweets_row[tweets_column] # tweets in a tweets array for json_in in tweets_json_array: id = json_in['id'] tweet_id_exists = tweet_id_dict.get(id) if tweet_id_exists is None: #print jsonIn tweet_id_dict[id] = 1 geo = 'none' if json_in[ 'geo'] is None else 'none' #json['geo'] retweeted = json_in['retweeted'] in_reply_to_screen_name = 'none' if json_in[ 'in_reply_to_screen_name'] is None else json_in[ 'in_reply_to_screen_name'] truncated = 'none' if json_in[ 'truncated'] is None else json_in[ 'truncated'] source = json_in['source'] created_at = json_in['created_at'] place = 'none' if json_in[ 'place'] is None else 'none' #json['place'] user_id = json_in['user']['id'] text = json_in['text'].strip() #text = " ".join(str(text).split()) text = str( filter(lambda x: x in string.printable, text)) #text = text.encode('utf-16') text = re.sub('\s+', ' ', text) text = text.replace('\\', '') entities = json_in['entities']['hashtags'] user_mentions = json_in['entities'][ 'user_mentions'] user_mentions = [] retweet_count = json_in['retweet_count'] favorite_count = json_in['favorite_count'] # if len(entities) > 0: # for entity in entities: # for k,v in entity.items(): # if k in 'text': # entity_list = {} # new_v = entity[k] # new_v = str(new_v.encode('utf-8')) # new_v = filter(lambda x: x in string.printable,new_v) # #print id,check,new_v,len(new_v) # if len(new_v) > 0: # entity[k] = new_v # else: # entity[k] = '' #print id,geo,retweeted ,in_reply_to_screen_name ,truncated ,source ,created_at ,place ,user_id ,text ,entities ,user_mentions,retweet_count,favorite_count f.write( str(id) + '\t' + str(geo) + '\t' + str(retweeted) + '\t' + str( in_reply_to_screen_name.encode( 'utf-8')) + '\t' + str(truncated) + '\t' + str(source.encode('utf-8')) + '\t' + str(created_at.encode('utf-8')) + '\t' + str(place) + '\t' + str(user_id) + '\t' + text + '\t' + str(json.dumps(entities)) + '\t' + str(user_mentions) + '\t' + str(retweet_count) + '\t' + str(favorite_count) + '\t' + str(trend_name) + '\t' + str(location_id) + '\n') else: continue # array of tweets json ends here #break # total number of tweets rows for a given trend ends here #break print 'Writing to table' with open(trend_name + '.txt') as f: cursor_write = conn.cursor() cursor_write.copy_from( f, 'organized_tweets', columns=('id', 'geo', 'retweeted', 'in_reply_to_screen_name', 'truncated', 'source', 'created_at', 'place', 'user_id', 'text', 'entities', 'user_mentions', 'retweet_count', 'favorite_count', 'trend', 'location_id')) conn.commit() os.remove(trend_name + '.txt') # all trends finish here #break except Exception: print traceback.format_exc()
def get_matrix(self,locationid): try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query = """ WITH TREND_COUNT_TT AS (SELECT TREND,COUNT(*) AS TREND_COUNT FROM TRENDS WHERE LOCATIONID = %s GROUP BY TREND), TOP_TRENDS_TT AS (SELECT TREND FROM TREND_COUNT_TT ORDER BY TREND_COUNT DESC LIMIT 15), IDS_FOR_TOP_TRENDS_TT AS (SELECT ID FROM ORGANIZED_TWEETS WHERE TREND IN (SELECT TREND FROM TOP_TRENDS_TT) AND LOCATION_ID = '2295420'), --SELECT * FROM IDS_FOR_TOP_TRENDS_TT ID_ENTITY_TOP_TRENDS_TT AS (SELECT TREND,ID,ENTITY FROM ID_ENTITY WHERE ID IN (SELECT ID FROM IDS_FOR_TOP_TRENDS_TT)), TREND_ENTITY_TF_IDF_SUM_TT AS (SELECT TREND,ENTITY,COUNT(ID) TF_IDF_SUM FROM ID_ENTITY WHERE ID IN (SELECT ID FROM IDS_FOR_TOP_TRENDS_TT) GROUP BY TREND,ENTITY), --SELECT * FROM TREND_ENTITY_TF_IDF_SUM_TT TREND_TF_IDF_SQ_SUM_TT AS (SELECT TREND, SUM(TF_IDF_SUM*TF_IDF_SUM) AS TF_IDF_SQ_SUM FROM TREND_ENTITY_TF_IDF_SUM_TT GROUP BY TREND), COSINE_DIST_NUM_TT AS (SELECT T1.TREND AS TREND1,T2.TREND AS TREND2, SUM(T1.TF_IDF_SUM*T2.TF_IDF_SUM) AS COSINE_NUM FROM TREND_ENTITY_TF_IDF_SUM_TT AS T1 INNER JOIN TREND_ENTITY_TF_IDF_SUM_TT AS T2 ON T2.TREND>T1.TREND AND T1.ENTITY = T2.ENTITY GROUP BY T1.TREND,T2.TREND), COSINE_DIST_TT AS (SELECT TREND1,TREND2, COSINE_NUM/(SQRT(T2.TF_IDF_SQ_SUM)*SQRT(T3.TF_IDF_SQ_SUM)) AS COSIND_DIST FROM COSINE_DIST_NUM_TT AS T1 INNER JOIN TREND_TF_IDF_SQ_SUM_TT AS T2 ON T1.TREND1=T2.TREND INNER JOIN TREND_TF_IDF_SQ_SUM_TT AS T3 ON T1.TREND2=T3.TREND) SELECT * FROM COSINE_DIST_TT ORDER BY TREND1,TREND2; """ cursor.execute(query,(locationid,)) trend1_column = 0 trend2_column = 1 distance_value_column = 2 trends_list = [] row_counter = 0 max_columns = 15 column_iteration = 1 distance_matrix = [[0 for x in xrange(max_columns)] for x in xrange(max_columns)] for row in cursor: trend1 = row[trend1_column] trend2 = row[trend2_column] if trend1 not in trends_list: trends_list.append(trend1) if trend2 not in trends_list: trends_list.append(trend2) # this is to check 0,0 1,1 and so on distance_matrix[row_counter][row_counter] = 0 # this populates 1,2 and 2,1 and so on # this avoid 2 loops distance_matrix[row_counter][column_iteration] = row[distance_value_column] distance_matrix[column_iteration][row_counter] = row[distance_value_column] column_iteration = column_iteration + 1 if column_iteration == max_columns: row_counter = row_counter + 1 column_iteration = row_counter + 1 return distance_matrix,trends_list except Exception: print traceback.format_exc()
def update_organized_tweets(self): tweet_id_dict = {} try: conn = PostgresConnector().get_connection() cursor = conn.cursor() query_location = 'select id from location' cursor.execute(query_location) location_column = 0 for row_location in cursor: query = """ select id,trend from trends where trend in(select trend from (select count(*) as c,trend from trends where locationid = %s group by trend)as t1 order by c desc limit 15) """ cursor = conn.cursor() location_id = row_location[location_column] cursor.execute(query,(location_id,)) trend_id_column = 0 trend_name_column = 1 trend_count = 0 for row in cursor: trend_count = trend_count + 1 trend_id = row[trend_id_column] trend_name = row[trend_name_column] print 'Processing for trend ' +trend_id+' , ' +str(trend_count) query_tweets = 'select tweets from tweets where trendId = \''+str(trend_id)+'\'' cursor_tweets = conn.cursor() cursor_tweets.execute(query_tweets) tweets_column = 0 with open(trend_name+'.txt','w') as f: # rows of tweets array for tweets_row in cursor_tweets: tweets_json_array = tweets_row[tweets_column] # tweets in a tweets array for json_in in tweets_json_array: id = json_in['id'] tweet_id_exists = tweet_id_dict.get(id) if tweet_id_exists is None: #print jsonIn tweet_id_dict[id] = 1 geo = 'none' if json_in['geo'] is None else 'none' #json['geo'] retweeted = json_in['retweeted'] in_reply_to_screen_name = 'none' if json_in['in_reply_to_screen_name'] is None else json_in['in_reply_to_screen_name'] truncated = 'none' if json_in['truncated'] is None else json_in['truncated'] source = json_in['source'] created_at = json_in['created_at'] place = 'none' if json_in['place'] is None else 'none'#json['place'] user_id = json_in['user']['id'] text = json_in['text'].strip() #text = " ".join(str(text).split()) text = str(filter(lambda x: x in string.printable,text)) #text = text.encode('utf-16') text = re.sub('\s+',' ',text) text = text.replace('\\','') entities = json_in['entities']['hashtags'] user_mentions = json_in['entities']['user_mentions'] user_mentions = [] retweet_count = json_in['retweet_count'] favorite_count = json_in['favorite_count'] # if len(entities) > 0: # for entity in entities: # for k,v in entity.items(): # if k in 'text': # entity_list = {} # new_v = entity[k] # new_v = str(new_v.encode('utf-8')) # new_v = filter(lambda x: x in string.printable,new_v) # #print id,check,new_v,len(new_v) # if len(new_v) > 0: # entity[k] = new_v # else: # entity[k] = '' #print id,geo,retweeted ,in_reply_to_screen_name ,truncated ,source ,created_at ,place ,user_id ,text ,entities ,user_mentions,retweet_count,favorite_count f.write(str(id)+'\t'+str(geo)+'\t'+str(retweeted)+'\t'+str(in_reply_to_screen_name.encode('utf-8'))+'\t'+str(truncated)+'\t'+str(source.encode('utf-8'))+'\t'+str(created_at.encode('utf-8'))+'\t'+str(place)+'\t'+str(user_id)+'\t'+text+'\t'+str(json.dumps(entities))+'\t'+str(user_mentions)+'\t'+str(retweet_count)+'\t'+str(favorite_count)+'\t'+str(trend_name)+'\t'+str(location_id)+'\n') else: continue # array of tweets json ends here #break # total number of tweets rows for a given trend ends here #break print 'Writing to table' with open(trend_name+'.txt') as f: cursor_write = conn.cursor() cursor_write.copy_from(f,'organized_tweets',columns=('id','geo','retweeted','in_reply_to_screen_name','truncated','source','created_at','place','user_id','text','entities','user_mentions','retweet_count','favorite_count','trend','location_id')) conn.commit() os.remove(trend_name+'.txt') # all trends finish here #break except Exception : print traceback.format_exc()