def ch(dis, loc, SSS): location3 = loc + SSS + '/3h/graph/' location6 = loc + SSS + '/6h/graph/' location12 = loc + SSS + '/12h/graph/' try: mg3 = nx.read_graphml(location3 + dis + ".graphml") mg6 = nx.read_graphml(location6 + dis + ".graphml") mg12 = nx.read_graphml(location12 + dis + ".graphml") db = sqlite3.connect(loc + SSS + '/SQLite/' + SSS + '_' + dis) c = db.cursor() c.execute("SELECT * FROM NODES order by date") db.commit() results = c.fetchall() print SSS + " : " + dis + ' 3' print SSS + " : " + dis + ' 6' print SSS + " : " + dis + ' 12' print '----------------------------------------' return (str(mg3.number_of_nodes()), str(mg6.number_of_nodes()), str(mg12.number_of_nodes()), str(mg3.number_of_edges()), str(mg6.number_of_edges()), str(mg12.number_of_edges())) except: print SSS + " : " + dis + "(No such graph)" print '----------------------------------------' return ("-", "-", "-", "-", "-", "-")
def ch(dis, loc, SSS): location3 = loc + SSS + '/3h/graph/' location6 = loc + SSS + '/6h/graph/' location12 = loc + SSS + '/12h/graph/' try: mg3 = nx.read_graphml(location3 + dis + ".graphml") mg6 = nx.read_graphml(location6 + dis + ".graphml") mg12 = nx.read_graphml(location12 + dis + ".graphml") db = sqlite3.connect(loc + SSS + '/SQLite/' + SSS + '_' + dis) c = db.cursor() c.execute("SELECT * FROM NODES order by date") db.commit() results = c.fetchall() print dis + ' 3 : ' + str(len(results)) + ' ---> (' + str( mg3.number_of_nodes()) + ') CC : ' + str( nx.average_clustering(mg3)) print dis + ' 6 : ' + str(len(results)) + ' ---> (' + str( mg6.number_of_nodes()) + ') CC : ' + str( nx.average_clustering(mg6)) print dis + ' 12: ' + str(len(results)) + ' ---> (' + str( mg12.number_of_nodes()) + ') CC : ' + str( nx.average_clustering(mg12)) print '----------------------------------------' except: print dis + " No such graph" print '----------------------------------------'
def create(disease, TIME, location): ddb = sqlite3.connect(loc + SSS + '/SQLite/' + SSS + '_' + disease) cc = ddb.cursor() heart_array = [] heart2_array = [] cc.execute("SELECT * FROM NODES order by date") ddb.commit() results = cc.fetchall() # LOOP ONE g_id = 0 for row in results: # heart if (disease in row[2]): heart_array.append(g_id) else: heart_array.append(0) g_id += 1 print g_id print 'finish LOOP ONE' # BigArray_heart = [[0 for j in range(g_id)] for i in range(g_id)] BigArray_heart = np.empty((g_id, g_id), dtype=np.int) # BigArray_heart = np.zeros((g_id,g_id)) # Defining a window of 3 hours # window = '000030000' window = TIME in_case = '000001000' limit_1 = firstdate(ddb, cc) window_time = add_time(limit_1, window) limit_2 = add_time(limit_1, in_case) last_tweet_date = lastdate(ddb, cc) while True: cc.execute("select * from nodes where date between " + str(limit_1) + " and " + str(limit_2) + " order by date") ddb.commit() rs = cc.fetchall() if len(rs) > 1: mn = rs[0][0] mx = rs[len(rs) - 1][0] fff = mx - mn if fff < 10000: print str(mn) + '--->' + str(mx) loop(mn, mx, heart_array, heart2_array, BigArray_heart) heart2_array = [] limit_2 = add_time(limit_2, in_case) if limit_2 > window_time: break print "FINISH @!" limit_1 = next_event(limit_1, ddb, cc) while True: cc.execute("select * from nodes where date between " + str(limit_1) + " and " + str(limit_2) + " order by date") ddb.commit() rs = cc.fetchall() if len(rs) > 1: mn = rs[0][0] mx = rs[len(rs) - 1][0] if mx > g_id: mx = g_id fff = mx - mn if (fff < LLL) and (fff > 0): loop(mn, mx, heart_array, heart2_array, BigArray_heart) heart2_array = [] print str(mn) + '--->' + str(mx) + ' = ' + str(fff) limit_1 = add_time(limit_1, in_case) limit_2 = add_time(limit_1, window) if limit_1 >= last_tweet_date: break print "////////////////////////////////////////////////////////////////" print 'enter Large Loop' fibReturn = fib2.loop(g_id, BigArray_heart) weight_heart = fibReturn[0] from_X_heart = fibReturn[1] to_y_heart = fibReturn[2] maxB_heart = fibReturn[3] print "////////////////////////////////////////////////////////////////" print 'Finish i,j loops' print "////////////////////////////////////////////////////////////////" Heart = open(location + '/degree/' + disease + '.txt', 'w') put_into_Graph(weight_heart, maxB_heart, from_X_heart, to_y_heart, disease, Heart, location) log = open(loc + 'log.txt', 'a') log.write("finish .... " + SSS + "_" + disease + " TW= " + TIME + "\n") print "finish " + SSS + "_" + disease
__author__ = 'Abduljaleel' from db_sqlite3 import sqlite3 import networkx as nx import csv import numpy as np import fib db = sqlite3.connect('FLdb_Partial') c = db.cursor() location = '/Users/Abduljaleel/Desktop/project' edge_weight = csv.writer(open(location + "/edge_weight.csv", "wb")) def firstdate(): sql = "SELECT min(date) FROM NODES" try: c.execute(sql) db.commit() except: db.rollback() results = c.fetchall() for r in results: first = r[0] return first def lastdate(): sql = "SELECT max(date) FROM NODES" try:
#connecting to MongoDB 1 connection = Connection() database = connection.twit_filtered8 t_mongo = database.tdf8 twit = t_mongo.find() print twit.count() #connecting to MongoDB 2 db_usa = connection.twit_usa t_u = db_usa.tu tw = t_u.find() print tw.count() #small SQLite DB for the counter db = sqlite3.connect('counterdb') c = db.cursor() # SQLite DB for matching loc and address db_match = sqlite3.connect('matchdb') c_m = db_match.cursor() # Sqlite DB for locstr db_locstr = sqlite3.connect('locstr') c_loc = db_locstr.cursor() def abv_to_state(kk): if kk == "al": return "Alabama" if kk == "ak": return "Alaska"
def do(sss): t_uf = dbf.tu tw = t_uf.find() SSS = sss location = '/Users/Abduljaleel/Desktop/project/' + SSS + '/SQLite/' heart = "heart" cancer = "cancer" clrd = "clrd" stroke = "stroke" alz = "alzheimer" diabetes = "diabetes" flupne = "flu_or_pneumonia" kidney = "kidney" septicemia = "septicemia" liver = "liver" hyper = "hyper" parkinson = "parkinson" db1 = sqlite3.connect(location + SSS + '_' + heart) c1 = db1.cursor() db2 = sqlite3.connect(location + SSS + '_' + cancer) c2 = db2.cursor() db3 = sqlite3.connect(location + SSS + '_' + clrd) c3 = db3.cursor() db4 = sqlite3.connect(location + SSS + '_' + stroke) c4 = db4.cursor() db5 = sqlite3.connect(location + SSS + '_' + alz) c5 = db5.cursor() db6 = sqlite3.connect(location + SSS + '_' + diabetes) c6 = db6.cursor() db7 = sqlite3.connect(location + SSS + '_' + flupne) c7 = db7.cursor() db8 = sqlite3.connect(location + SSS + '_' + kidney) c8 = db8.cursor() db9 = sqlite3.connect(location + SSS + '_' + septicemia) c9 = db9.cursor() db10 = sqlite3.connect(location + SSS + '_' + liver) c10 = db10.cursor() db11 = sqlite3.connect(location + SSS + '_' + hyper) c11 = db11.cursor() db12 = sqlite3.connect(location + SSS + '_' + parkinson) c12 = db12.cursor() sql = "CREATE TABLE NODES (ID INTEGER, USER TEXT,DISEASES TEXT, DATE INTEGER)" c1.execute("DROP TABLE IF EXISTS NODES") c1.execute(sql) c2.execute("DROP TABLE IF EXISTS NODES") c2.execute(sql) c3.execute("DROP TABLE IF EXISTS NODES") c3.execute(sql) c4.execute("DROP TABLE IF EXISTS NODES") c4.execute(sql) c5.execute("DROP TABLE IF EXISTS NODES") c5.execute(sql) c6.execute("DROP TABLE IF EXISTS NODES") c6.execute(sql) c7.execute("DROP TABLE IF EXISTS NODES") c7.execute(sql) c8.execute("DROP TABLE IF EXISTS NODES") c8.execute(sql) c9.execute("DROP TABLE IF EXISTS NODES") c9.execute(sql) c10.execute("DROP TABLE IF EXISTS NODES") c10.execute(sql) c11.execute("DROP TABLE IF EXISTS NODES") c11.execute(sql) c12.execute("DROP TABLE IF EXISTS NODES") c12.execute(sql) n = 0 n1 = 0 n2 = 0 n3 = 0 n4 = 0 n5 = 0 n6 = 0 n7 = 0 n8 = 0 n9 = 0 n10 = 0 n11 = 0 n12 = 0 for row in tw: state = row['state'].strip() # if (state == SSS): user = row['data']['user']['screen_name'] tt = row['data']['text'].lower() txt = distext(row['data']['text'].lower()) date = disdate(row['data']['created_at']) if state != '': if (is_heart(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n1) + ",'" + user + "','" + txt + "'," + str( date) + ")" c1.execute(sql) db1.commit() n1 += 1 except: db1.rollback() if (is_cancer(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n2) + ",'" + user + "','" + txt + "'," + str( date) + ")" c2.execute(sql) db2.commit() n2 += 1 except: db2.rollback() if (is_clrd(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n3) + ",'" + user + "','" + txt + "'," + str( date) + ")" c3.execute(sql) db3.commit() n3 += 1 except: db3.rollback() if (is_stroke(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n4) + ",'" + user + "','" + txt + "'," + str( date) + ")" c4.execute(sql) db4.commit() n4 += 1 except: db4.rollback() if (is_alzheimer(txt)): try: sql = "INSERT INTO NODES VALUES (" + str( n5) + ",'" + user + "','" + txt + "'," + str( date) + ")" c5.execute(sql) db5.commit() n5 += 1 except: db5.rollback() if (is_diabetes(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n6) + ",'" + user + "','" + txt + "'," + str( date) + ")" c6.execute(sql) db6.commit() n6 += 1 except: db6.rollback() if (is_flu_or_pneumonia(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n7) + ",'" + user + "','" + txt + "'," + str( date) + ")" c7.execute(sql) db7.commit() n7 += 1 except: db7.rollback() if (is_kidney(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n8) + ",'" + user + "','" + txt + "'," + str( date) + ")" c8.execute(sql) db8.commit() n8 += 1 except: db8.rollback() if (is_septicemia(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n9) + ",'" + user + "','" + txt + "'," + str( date) + ")" c9.execute(sql) db9.commit() n9 += 1 except: db9.rollback() if (is_liver(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n10) + ",'" + user + "','" + txt + "'," + str( date) + ")" c10.execute(sql) db10.commit() n10 += 1 except: db10.rollback() if (is_hyper(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n11) + ",'" + user + "','" + txt + "'," + str( date) + ")" c11.execute(sql) db11.commit() n11 += 1 except: db11.rollback() if (is_parkinson(tt)): try: sql = "INSERT INTO NODES VALUES (" + str( n12) + ",'" + user + "','" + txt + "'," + str( date) + ")" c12.execute(sql) db12.commit() n12 += 1 except: db12.rollback() n += 1 print n print "---------------------------------------------------------------------------" print str(n1) + " " + heart print "---------------------------------------------------------------------------" print str(n2) + " " + cancer print "---------------------------------------------------------------------------" print str(n3) + " " + clrd print "---------------------------------------------------------------------------" print str(n4) + " " + stroke print "---------------------------------------------------------------------------" print str(n5) + " " + alz print "---------------------------------------------------------------------------" print str(n6) + " " + diabetes print "---------------------------------------------------------------------------" print str(n7) + " " + flupne print "---------------------------------------------------------------------------" print str(n8) + " " + kidney print "---------------------------------------------------------------------------" print str(n9) + " " + septicemia print "---------------------------------------------------------------------------" print str(n10) + " " + liver print "---------------------------------------------------------------------------" print str(n11) + " " + hyper print "---------------------------------------------------------------------------" print str(n12) + " " + parkinson print "---------------------------------------------------------------------------"
__author__ = 'Abduljaleel' from db_sqlite3 import sqlite3 import networkx as nx import csv import numpy as np import fib SSS = 'New York' LLL = 1500 db = sqlite3.connect(SSS) c = db.cursor() location = '/Users/Abduljaleel/Desktop/project/'+SSS+'/3h' edge_weight = csv.writer(open(location+"/edge_weight.csv", "wb")) def firstdate(): sql = "SELECT min(date) FROM NODES" try: c.execute(sql) db.commit() except: db.rollback() results = c.fetchall() for r in results: first = r[0] return first
def build(Time, dis): location = '/Users/Abduljaleel/Desktop/project/USA/' + Time + '/' + dis + '/' db = '/Users/Abduljaleel/Desktop/project/USA/SQLite/USA_' + dis ddb = sqlite3.connect(db) cc = ddb.cursor() twit_line = [] heart2_array = [] cc.execute("SELECT * FROM NODES order by date") ddb.commit() results = cc.fetchall() all = len(results) window = '000010000' in_case = '000001500' limit_1 = firstdate(ddb, cc) window_time = add_time(limit_1, window) limit_2 = add_time(limit_1, in_case) last_tweet_date = lastdate(ddb, cc) tt1 = strftime("%a, %d %b %Y %X +0000", gmtime()) while True: cc.execute("select * from nodes where date between " + str(limit_1) + " and " + str(limit_2) + " order by date") ddb.commit() rs = cc.fetchall() aa = [] for i in range(0, len(rs)): aa.append(rs[i][0]) # for j in range (0,len(aa)): # for jj in range (j+1,len(aa)): # insert_edge(Net1,aa[j],aa[jj]) # # print str(aa[j])+','+str(aa[jj]) fib3.loop(Net1, aa) limit_2 = add_time(limit_2, in_case) if limit_2 > window_time: break limit_1 = next_event(limit_1, ddb, cc) while True: cc.execute("select * from nodes where date between " + str(limit_1) + " and " + str(limit_2) + " order by date") ddb.commit() rs = cc.fetchall() aa = [] try: d = int(rs[0][0]) e = float(d * 100) / all print int(e) except: pr = 1 for i in range(0, len(rs)): aa.append(rs[i][0]) # for j in range (0,len(aa)): # for jj in range (j+1,len(aa)): # insert_edge(Net1,aa[j],aa[jj]) # # print str(aa[j])+','+str(aa[jj]) fib3.loop(Net1, aa) limit_1 = add_time(limit_1, in_case) limit_2 = add_time(limit_1, window) if limit_1 >= last_tweet_date: break # print 'start creating' # nx.write_graphml(Net1, location+"Net1.graphml") print 'finish creating' # k =nx.read_graphml(location+"Net1.graphml") # # k = igraph.read(location+"test.graphml") # a = Net1.get_edgelist() # print k.edge[0][1]['weight'] # c3=str(nx.average_clustering(Net1)) X = [] Y = [] W = [] a = Net1.edges() fib3.app(X, Y, W, a, Net1) # for i in range (0,len(a)-1): # x = a[i][0] # y = a[i][1] # w = Net1.edge[x][y]['weight'] # # print '('+str(x)+','+str(y)+','+str(w)+')' # X.append(x) # Y.append(y) # W.append(w) # fib3.create(X,Y,W,Net2) mx = max(W) cc = open(location + 'Net1.csv', 'a') print 'start writing to csv' for i in range(0, len(W)): norm = float(W[i]) / mx if (norm >= 0.8): Net2.add_edge(str(X[i]), str(Y[i])) cc.write(str(X[i]) + '\t' + str(Y[i]) + '\n') print str(X[i]) + ',' + str(Y[i]) 'finish writing to csv' print '------degree start' deg = open(location + 'degree.txt', 'a') # st = open(location+'statistics.txt', 'a') # DD = open(location+'DD.txt', 'a') # # # Net22 = igraph.Graph(Net2) # # data = [] for s in nx.degree(Net2): deg.write(str(nx.degree(Net2, s)) + "\n") # data.append(nx.degree(Net2, s)) # # deg.write(str(data[i])+'\n') # # for i in range (0,max(data)): # j = i+1 # count = 0 # for k in range (0,len(data)): # if data[k] == j: # count +=1 # DD.write(str(j)+'\t'+str(count)+'\n') # print 'cluster start' # cluster = nx.average_clustering(Net2) # print 'pl start' # pl = Net22.average_path_length() # n=0 # summ=0 # for g in nx.connected_component_subgraphs(Net2): # summ+=float(nx.average_shortest_path_length(g)) # n+=1 # summ = float(summ)/n # # # # st.write('cluster :'+str(cluster)+'\n') # st.write('path_Len :'+str(summ)+'\n') # nx.write_graphml(Net2, location+"Net2.graphml") # Net2.write_graphml(location+dis+"Net2.graphml") st = open(location + 'log.txt', 'a') st.write(str(tt1) + '\n') st.write(str(strftime("%a, %d %b %Y %X +0000", gmtime())) + '\n') st.write(str('------------') + '\n')
def build(Time, dis, window, in_case): location = '/Users/Abduljaleel/Desktop/project/USA/' + Time + '/' + dis + '/' location_Time = '/Users/Abduljaleel/Desktop/project/USA/' + Time + '/' st = open(location_Time + 'log_where.txt', 'a') st.write(str(dis + '_' + Time) + '\n') db = '/Users/Abduljaleel/Desktop/project/USA/SQLite/USA_' + dis ddb = sqlite3.connect(db) cc = ddb.cursor() cc.execute("SELECT * FROM NODES order by date") ddb.commit() results = cc.fetchall() all = len(results) limit_1 = firstdate(ddb, cc) window_time = add_time(limit_1, window) limit_2 = add_time(limit_1, in_case) last_tweet_date = lastdate(ddb, cc) start = time.time() w = 1 while True: cc.execute("select * from nodes where date between " + str(limit_1) + " and " + str(limit_2) + " order by date") ddb.commit() rs = cc.fetchall() aa = [] for i in range(0, len(rs)): aa.append(rs[i][0]) w = loop(Net1, aa, w) limit_2 = add_time(limit_2, in_case) if limit_2 > window_time: break limit_1 = next_event(limit_1, ddb, cc) w = 1 while True: cc.execute("select * from nodes where date between " + str(limit_1) + " and " + str(limit_2) + " order by date") ddb.commit() rs = cc.fetchall() aa = [] try: d = int(rs[0][0]) e = float(d * 100) / all print int(e) except: pr = 1 for i in range(0, len(rs)): aa.append(rs[i][0]) w = loop(Net1, aa, w) limit_1 = add_time(limit_1, in_case) limit_2 = add_time(limit_1, window) if limit_1 >= last_tweet_date: break print 'finish creating' threshold = w * 0.79 H = nx.Graph([(u, v, d) for (u, v, d) in Net1.edges_iter(data=True) if d['weight'] > threshold]) print '------degree start------' deg = open(location + 'degree.txt', 'w') DD = open(location + 'DD.txt', 'w') data = [] DDD = H.degree() for s in DDD: deg.write(str(nx.degree(H, s)) + "\n") data.append(nx.degree(H, s)) for i in range(0, max(data)): j = i + 1 count = 0 for k in range(0, len(data)): if data[k] == j: count += 1 DD.write(str(j) + '\t' + str(count) + '\n') # st = open(location+'statistics.txt', 'a') # print 'cluster start' # cluster = nx.average_clustering(Net2) # print 'pl start' # pl = Net22.average_path_length() # n=0 # summ=0 # for g in nx.connected_component_subgraphs(Net2): # summ+=float(nx.average_shortest_path_length(g)) # n+=1 # summ = float(summ)/n # st.write('cluster :'+str(cluster)+'\n') # st.write('path_Len :'+str(summ)+'\n') nx.write_graphml(H, location + dis + "_graph.graphml") sts = open(location_Time + 'log_sec.txt', 'a') end = time.time() - start sts.write(str(dis + '_' + Time) + '\t' + str(end) + '\n')
def build(state,Time,dis,window,in_case): added_vertices = set() db = '/Users/Abduljaleel/Desktop/project1/'+state+'/SQLite/'+state+'_'+dis ddb = sqlite3.connect(db) cc = ddb.cursor() cc.execute("SELECT * FROM NODES order by date") ddb.commit() results = cc.fetchall() all = len(results) limit_1 = firstdate(ddb,cc) window_time = add_time(limit_1, window) limit_2 = add_time(limit_1, in_case) last_tweet_date = lastdate(ddb,cc) if (all>=30): while True: cc.execute("select * from nodes where date between " + str(limit_1) + " and " + str(limit_2) + " order by date") ddb.commit() rs = cc.fetchall() aa=[] for i in range (0,len(rs)): aa.append(rs[i][0]) fib4.loop(Net1,aa,added_vertices) limit_2 = add_time(limit_2, in_case) if limit_2 > window_time: break limit_1 = next_event(limit_1,ddb,cc) while True: cc.execute("select * from nodes where date between " + str(limit_1) + " and " + str(limit_2) + " order by date") ddb.commit() rs = cc.fetchall() aa=[] try: d = int(rs[0][0]) e = float(d*100)/all print int(e) except: pr=1 for i in range (0,len(rs)): aa.append(rs[i][0]) fib4.loop(Net1,aa,added_vertices) limit_1 = add_time(limit_1, in_case) limit_2 = add_time(limit_1, window) if limit_1 >= last_tweet_date: break added_vertices = set() print 'finish creating' # print '------degree start------' # location = '/Users/Abduljaleel/Desktop/project/degrees/'+state+'/'+Time # de = open(location+'/degree/'+dis+'_degree.txt','a') # wde = open(location+'/weighted_degree/'+dis+'_wd.txt','a') # # degrees(Net1,de) # weighted_degree(Net1,wde) print '------Writing Graphml start------'+ state+'---'+Time+'----'+dis # nx.write_graphml(Net1, '/Users/Abduljaleel/Desktop/project/graphs/'+state+'/'+Time+'/'+dis+'_'+Time+'.graphml') Net1.write_graphml('/Users/Abduljaleel/Desktop/project/graphs/'+state+'/'+Time+'/'+dis+'_'+Time+'.graphml')
from pymongo import * from db_sqlite3 import sqlite3 import sys reload(sys) sys.setdefaultencoding("utf8") #connecting to MongoDB 1 (THE MAIN DB - USA) connection = Connection() db = connection.twit_usa t_u = db.tu tw = t_u.find() print tw.count() # SQLite DB for matching loc and address db_match = sqlite3.connect('matchdb') c_m = db_match.cursor() def abv_to_state(kk): kk = kk.lower() if kk == "al": return "Alabama" if kk == "ak": return "Alaska" if kk == "az": return "Arizona" if kk == "ar": return "Arkansas" if kk == "ca": return "California"
print "////////////////////////////////////////////////////////////////" print 'enter Large Loop' fibReturn = fib2.loop(g_id, BigArray) weight = fibReturn[0] from_X = fibReturn[1] to_y = fibReturn[2] maxB = fibReturn[3] print "////////////////////////////////////////////////////////////////" print 'Finish i,j loops' print "////////////////////////////////////////////////////////////////" deg = open(location + '/degree/' + disease + '.txt', 'w') put_into_Graph(weight, maxB, from_X, to_y, disease, deg) print "finish " + disease SSS = 'North Dakota' LLL = 1500 GI = nx.Graph() db = sqlite3.connect('/Users/Abduljaleel/Desktop/project/SQLite/' + SSS) c = db.cursor() location = '/Users/Abduljaleel/Desktop/project/' + SSS + '/3h' edge_weight = csv.writer(open(location + "/edge_weight.csv", "wb")) create('heart', '000030000')
__author__ = 'Abduljaleel' from pymongo import * from db_sqlite3 import sqlite3 #connecting to FL MongoDB connection = Connection() dbf = connection.twit_usa t_uf = dbf.tu tw = t_uf.find() db = sqlite3.connect('USA') c = db.cursor() def is_heart(txt): if ("heart disease" in txt) or ("diseases of heart" in txt) or ("coronary artery" in txt) or \ ("heart failure" in txt) or ("heart attack" in txt) or ("acute coronary" in txt) or ("angina" in txt) or \ ("atrial fib" in txt) or ("arrhythmias" in txt) or ("atherosclerotic cardiovascular" in txt) or \ ("congenital heart" in txt) or ("peripheral arterial disease" in txt) or ("pericardial" in txt) or \ ("myocardial infarction" in txt) or ("endocarditis" in txt) or ("pericardium" in txt) or \ ("myocarditis" in txt) or ("cardiac arrest" in txt) or ("congestive heart" in txt) or \ ("heart block" in txt) or ("chd" in txt) or ("ihd" in txt) or ("cad" in txt): return True else: return False def is_stroke(txt): if ("stroke" in txt) or ("brain attack" in txt) or ("cerebrovascular" in txt): return True else: return False
__author__ = 'Abduljaleel' from pymongo import * from db_sqlite3 import sqlite3 #connecting to FL MongoDB connection = Connection() dbf = connection.florida t_uf = dbf.fl tw = t_uf.find() db = sqlite3.connect('FLdb_Partial10_penumonitis') c = db.cursor() def is_penumonitis(txt): if ("aspiration pneumonia" in txt) or ("pulmonary aspiration" in txt) or ( "inhalation pneumonia" in txt) or ("endotracheal aspiration" in txt): return True else: return False def distext(tweet): txt = '' if (is_penumonitis(tweet)): txt += "penumonitis " return txt