def open_cassandra_session(): setup_env() connection.setup([CASSANDRA_CONFIG['ip']], CASSANDRA_CONFIG['keyspace'], protocol_version=3) SourceUserEntity.sync_table() RawRecentMediaEntity.sync_table() RawUserEntity.sync_table() RawUserRecentMediaEntity.sync_table()
def produce_raw_layer(): try: print 'START: Insert of data into database at %s.' % datetime.datetime.now() cycle_start_time = datetime.datetime.now() recent_media_added = 0 users_added = 0 users_updated = 0 user_recent_media_added = 0 # Get recent popular media recent_media = api.media_popular(count=64) for media in recent_media: # Parse the recent popular media parsed_media = RawRecentMediaEntity.parse(media) # Determine if english speaking user, if so, continue ## TODO: Maybe detect all possible languages and then if 'en' is in it, it passes try: if langdetect.detect(parsed_media.caption_text) != 'en': continue except LangDetectException: continue # Save the parsed media parsed_media.save() recent_media_added += 1 user_recent_media_added, users_added, users_updated = handle_user_info(parsed_media, user_recent_media_added, users_added, users_updated) log_run_metrics(cycle_start_time, recent_media_added, users_added, users_updated, user_recent_media_added) except Exception as e: print("ERROR - userId: %d caused error: " + str(e)) pass
def produce_raw_layer(): try: print 'START: Insert of data into database at %s.' % datetime.datetime.now( ) cycle_start_time = datetime.datetime.now() recent_media_added = 0 users_added = 0 users_updated = 0 user_recent_media_added = 0 # Get recent popular media recent_media = api.media_popular(count=64) for media in recent_media: # Parse the recent popular media parsed_media = RawRecentMediaEntity.parse(media) # Determine if english speaking user, if so, continue ## TODO: Maybe detect all possible languages and then if 'en' is in it, it passes try: if langdetect.detect(parsed_media.caption_text) != 'en': continue except LangDetectException: continue # Save the parsed media parsed_media.save() recent_media_added += 1 user_recent_media_added, users_added, users_updated = handle_user_info( parsed_media, user_recent_media_added, users_added, users_updated) log_run_metrics(cycle_start_time, recent_media_added, users_added, users_updated, user_recent_media_added) except Exception as e: print("ERROR - userId: %d caused error: " + str(e)) pass