def export_temporal_data(time_window, start_time, end_time): fetched_results = get_all_stats() ids = get_all_foursquare_ids() venue_dic = {} for r in ids.keys(): venue_dic[r] = [] cur_time = start_time """round time to nearest hour/day/week""" if time_window == 'hour': time_window = timedelta(hours=2) cur_time = cur_time - timedelta( minutes=cur_time.minute, seconds=cur_time.second, microseconds=cur_time.microsecond) + timedelta(hours=1) elif time_window == 'day': time_window = timedelta(days=1) cur_time = cur_time - timedelta( hours=cur_time.hour, minutes=cur_time.minute, seconds=cur_time.second, microseconds=cur_time.microsecond) + timedelta(days=1) elif time_window == 'week': time_window = timedelta(weeks=1) cur_time = cur_time - timedelta( hours=cur_time.hour, minutes=cur_time.minute, seconds=cur_time.second, microseconds=cur_time.microsecond) + timedelta(weeks=1) data_type = 'checkinsCount' time_list = [] time_list.append('Plaza/Time') while cur_time > start_time and cur_time <= end_time: print cur_time time_list.append(cur_time.hour) for id in venue_dic.keys(): sql = "select * from venue_stats where id = '" + id + "' and time between '" + str( cur_time) + "' and '" + str(cur_time + time_window) + "'" cursor = connect_to_mysql() cursor.execute(sql) res = cursor.fetchall() if len(res) != 0: tmp = [val[data_type] for val in res] value = max(tmp) - min(tmp) else: value = 0 venue_dic[id].append(value) cur_time += time_window f = open('./temporal_2hour.csv', 'wt') writer = csv.writer(f, quoting=csv.QUOTE_ALL) writer.writerow(time_list) for key in venue_dic.keys(): venue_dic[key].insert(0, ids[key]) writer.writerow(venue_dic[key]) print data_type print time_list for id in ids.keys(): print ids[id], venue_dic[id]
def main(): foursquare_ids = get_all_foursquare_ids() print foursquare_ids venues = {} cursor = connect_to_mysql() word_doc_freq = {} docs = [] names = [] comments_count = [] for id in foursquare_ids: sql = """select caption from plazas_instaphoto where caption is not NULL and foursquare_venue_id='""" + id + "'" venue_name = foursquare_ids[id] venues[venue_name] = {} cursor.execute(sql) res = cursor.fetchall() doc = "" for r in res: comments = r['caption'] #comments = json.loads(r['caption']) for sentence in comments: #doc += sentence[1] doc += sentence docs.append(doc) names.append(venue_name) comments_count.append(len(res)) t = TFIDF() res = t.compute_tfidf(docs) for i in range(len(res)): print 'Plaza name : ' + names[i].encode('utf-8', 'ignore') print 'Comments for this Plaza : ' + str(comments_count[i]) print res[i]
def main(): foursquare_ids = get_all_foursquare_ids() print foursquare_ids venues = {} cursor = connect_to_mysql() word_doc_freq = {} docs = [] names = [] comments_count = [] for id in foursquare_ids: sql = """select caption from plazas_instaphoto where caption is not NULL and foursquare_venue_id='"""+id + "'" venue_name = foursquare_ids[id] venues[venue_name] = {} cursor.execute(sql) res = cursor.fetchall() doc = "" for r in res: comments = r['caption'] #comments = json.loads(r['caption']) for sentence in comments: #doc += sentence[1] doc += sentence docs.append(doc) names.append( venue_name ) comments_count.append( len(res) ) t = TFIDF() res = t.compute_tfidf(docs) for i in range(len(res) ): print 'Plaza name : '+names[i].encode('utf-8','ignore') print 'Comments for this Plaza : ' + str(comments_count[i]) print res[i]
def checkin_update(): """Update all the checkin info for each foursquare venue in database""" add_table_venue_stats() venue_ids = get_all_foursquare_ids() client = foursquare.Foursquare(config.foursquare_client_id, client_secret=config.foursquare_client_secret) for venue_id in venue_ids: #try: print venue_id venue_meta = client.venues(venue_id) save_venue_stats(venue_meta, venue_id)
def instagram_test(): add_table_venue_photo_instagram() crawler = VenuePhotoCrawlerInstagram() foursquare_ids = get_all_foursquare_ids() #fetched_ids = get_all_photo_fetched_venue_id_instagram() for foursquare_id in foursquare_ids.keys(): """NOTICE THIS IS TO AVOID REPEATING FETCHING""" #if foursquare_id not in fetched_ids: print foursquare_id, foursquare_ids[foursquare_id] crawler.grab_photos(foursquare_id)
def main(): foursquare_ids = get_all_foursquare_ids() print foursquare_ids venues = {} cursor = connect_to_mysql() word_doc_freq = {} global_word_freq = {} for id in foursquare_ids: sql = ( """select comments from venue_photo_instagram where comments is not NULL and foursquare_venue_id='""" + id + "'" ) venue_name = foursquare_ids[id] venues[venue_name] = {} cursor.execute(sql) res = cursor.fetchall() # comments = [] for r in res: comments = json.loads(r["comments"]) for sentence in comments: words = tokenize(sentence[1]) for word in words: if word in global_word_freq: global_word_freq[word] += 1 else: global_word_freq[word] = 1 if word in venues[venue_name]: venues[venue_name][word] += 1 else: venues[venue_name][word] = 1 for w in venues[venue_name].keys(): if w in word_doc_freq: word_doc_freq[w] += 1 else: word_doc_freq[w] = 1 # comments.append( json.loads(r['comments']) ) # print venues[venue_name] too_common_word = [] for word in sorted(global_word_freq.items(), key=lambda tup: tup[1], reverse=True)[:300]: too_common_word.append(word[0]) print too_common_word for venue_name in venues.keys(): word_score = [] words = venues[venue_name] for word in words: score = words[word] / (1 + math.log(word_doc_freq[word])) if word not in too_common_word: word_score.append((word, score, words[word])) print venue_name print sorted(word_score, key=lambda tup: tup[1], reverse=True)[0:20]
def export_temporal_data(time_window, start_time, end_time): fetched_results = get_all_stats() ids = get_all_foursquare_ids() venue_dic = {} for r in ids.keys(): venue_dic[r] = [] cur_time = start_time """round time to nearest hour/day/week""" if time_window == 'hour': time_window = timedelta(hours=2) cur_time = cur_time - timedelta(minutes=cur_time.minute, seconds=cur_time.second, microseconds = cur_time.microsecond) + timedelta(hours=1) elif time_window == 'day': time_window = timedelta(days=1) cur_time = cur_time - timedelta(hours = cur_time.hour, minutes=cur_time.minute, seconds=cur_time.second, microseconds = cur_time.microsecond) + timedelta(days=1) elif time_window == 'week': time_window = timedelta(weeks=1) cur_time = cur_time - timedelta(hours = cur_time.hour, minutes=cur_time.minute, seconds=cur_time.second, microseconds = cur_time.microsecond)+timedelta(weeks=1) data_type = 'checkinsCount' time_list = [] time_list.append('Plaza/Time') while cur_time > start_time and cur_time <= end_time: print cur_time time_list.append(cur_time.hour) for id in venue_dic.keys(): sql = "select * from venue_stats where id = '" + id + "' and time between '" + str(cur_time) + "' and '" + str(cur_time+time_window) +"'" cursor = connect_to_mysql() cursor.execute(sql) res = cursor.fetchall() if len(res) != 0: tmp = [val[data_type] for val in res ] value = max(tmp) - min(tmp) else: value = 0 venue_dic[id].append( value ) cur_time+= time_window f = open('./temporal_2hour.csv', 'wt') writer = csv.writer(f, quoting=csv.QUOTE_ALL) writer.writerow( time_list) for key in venue_dic.keys(): venue_dic[key].insert(0, ids[key]) writer.writerow( venue_dic[key] ) print data_type print time_list for id in ids.keys(): print ids[id], venue_dic[id]
def main(): foursquare_ids = get_all_foursquare_ids() print foursquare_ids venues = {} cursor = connect_to_mysql() word_doc_freq = {} global_word_freq = {} for id in foursquare_ids: sql = """select comments from plazas_instaphoto where comments is not NULL and foursquare_venue_id='""" + id + "'" venue_name = foursquare_ids[id] venues[venue_name] = {} cursor.execute(sql) res = cursor.fetchall() for r in res: comments = json.loads(r['comments']) for sentence in comments: words = tokenize(sentence[1]) for word in words: if word in global_word_freq: global_word_freq[word] += 1 else: global_word_freq[word] = 1 if word in venues[venue_name]: venues[venue_name][word] += 1 else: venues[venue_name][word] = 1 for w in venues[venue_name].keys(): if w in word_doc_freq: word_doc_freq[w] += 1 else: word_doc_freq[w] = 1 too_common_word = set() for word in sorted(global_word_freq.items(), key=lambda tup: tup[1], reverse=True)[:300]: too_common_word.add(word[0]) for venue_name in venues.keys(): word_score = [] words = venues[venue_name] for word in words: score = words[word] / (1 + math.log(word_doc_freq[word])) if word not in too_common_word: word_score.append((word, score, words[word])) print venue_name print sorted(word_score, key=lambda tup: tup[1], reverse=True)[0:20]
def main(): foursquare_ids = get_all_foursquare_ids() print foursquare_ids venues = {} cursor = connect_to_mysql() word_doc_freq = {} global_word_freq = {} for id in foursquare_ids: sql = """select caption from plazas_instaphoto where caption is not NULL and foursquare_venue_id='"""+id + "'" venue_name = foursquare_ids[id] venues[venue_name] = {} cursor.execute(sql) res = cursor.fetchall() for r in res: comments = [r['caption']] for sentence in comments: words = tokenize(sentence) for word in words: if word in global_word_freq: global_word_freq[word]+=1 else: global_word_freq[word] = 1 if word in venues[venue_name]: venues[venue_name][word] += 1 else: venues[venue_name][word] = 1 for w in venues[venue_name].keys(): if w in word_doc_freq: word_doc_freq[w]+=1 else: word_doc_freq[w] = 1 too_common_word = set() for word in sorted(global_word_freq.items(), key = lambda tup: tup[1], reverse=True)[:300]: too_common_word.add(word[0]) for venue_name in venues.keys(): word_score = [] words = venues[venue_name] for word in words: score = words[word]/(1+math.log(word_doc_freq[word])) if word not in too_common_word: word_score.append( (word, score, words[word]) ) print venue_name.encode('utf-8','ignore') print sorted(word_score, key=lambda tup: tup[1], reverse=True)[0:50]