def tweet_untrained_zone_vic_get(request, resource=100): """ This views is used to return the tweet have not been located to client """ start_timer = time.time() try: current_db = tweet_couch_db.get_current_database() tweets = current_db.view('unlearning/vic_zone', limit=resource) except Exception as e: influxdb_handler.make_point(key='api/tweet/untrained/zone/vic/', method='GET', error=400, prefix='API') logger.error('Query Untrained Tweet Fail! %s', e) resp = init_http_not_found('Query Untrained Tweet Fail!') make_json_response(HttpResponseBadRequest, resp) resp = init_http_success() for tweet in tweets: resp['data'].update({ tweet.id: dict( geo=tweet.value.get('geo', []), zone=None ) }) timer = (time.time() - start_timer) influxdb_handler.make_point(key='api/tweet/untrained/zone/vic/', method='GET', error='success', prefix='API', tweet=len(resp['data']), timer=timer) return make_json_response(HttpResponse, resp)
def tweet_untrained_get(request, resource=100): """ This views is used to return unlearnig tweet to client """ start_timer = time.time() try: current_db = tweet_couch_db.get_current_database() tweets = current_db.view('unlearning/machine', limit=resource) except Exception as e: influxdb_handler.make_point(key='api/tweet/untrained/', method='GET', error=400, prefix='API') logger.error('Query Untrained Tweet Fail! %s', e) resp = init_http_not_found('Query Untrained Tweet Fail!') make_json_response(HttpResponseBadRequest, resp) resp = init_http_success() for tweet in tweets: resp['data'].update({ tweet.id: dict( img_id=tweet.value.get('img_id'), tags=tweet.value.get('tags', {}), model=tweet.value.get('model', {}) ) }) timer = (time.time() - start_timer) influxdb_handler.make_point(key='api/tweet/untrained/', method='GET', error='success', prefix='API', tweet=len(resp['data']), timer=timer) return make_json_response(HttpResponse, resp)
def tweet_post(request): """ This views is used to receive crawler's tweet """ try: keys = ['id', 'text', 'img_id', 'geo', 'date', 'user', 'hashtags'] tweet = make_dict(keys, ujson.loads(request.body)) except Exception as e: influxdb_handler.make_point(key='api/tweet/', method='POST', error=400, prefix='API', msg='error attribute') logger.debug('Insufficient Attributes [%s] %s' % (request.path, e)) resp = init_http_not_found('Insufficient Attributes') return make_json_response(HttpResponseBadRequest, resp) if not isinstance(tweet['geo'], list) or not isinstance(tweet['hashtags'], list or not isinstance(tweet['img_id'], list)): influxdb_handler.make_point(key='api/tweet/', method='POST', error=400, prefix='API', msg='error geo/img_id') resp = init_http_not_found('geo, hashtags, img_id must be LIST!') return make_json_response(HttpResponseBadRequest, resp) try: # Process the datetime utc_tweet_time = parse_datetime(tweet['date']).astimezone(timezone.utc) except Exception as e: influxdb_handler.make_point(key='api/tweet/', method='POST', error=400, prefix='API', msg='error format time') logger.debug('Error Datetime Format [%s], %s' % (tweet['date'], e)) resp = init_http_not_found('Error Datetime Format, follow \'%Y-%m-%d %H:%M:%S%z\'') return make_json_response(HttpResponseBadRequest, resp) if tweet['geo'] == ['']: tweet['geo'] = [] tweet.update(dict( _id=tweet['id'], date=utc_tweet_time.strftime('%Y-%m-%d %H:%M:%S%z'), process=0, process_text=0, model={}, tags={}, last_update=timezone.now().astimezone(timezone.utc).strftime('%Y-%m-%d %H:%M:%S%z'), text_update='', ml_update='' )) tweet.pop('id') try: tweet_id, rev = tweet_couch_db.save(tweet) except Exception as e: influxdb_handler.make_point(key='api/tweet/', method='POST', error=400, prefix='API', msg='existed') resp = init_http_not_found('Tweet Existed') return make_json_response(HttpResponseBadRequest, resp) resp = init_http_success() resp['data'].update(dict( id=tweet_id, rev=rev )) influxdb_handler.make_point(key='api/tweet/', method='POST', error='success', prefix='API') return make_json_response(HttpResponse, resp)
def tweet_trained_post(request): """ This views is used to receive the Machine Learning results from client """ start_timer = time.time() results = ujson.loads(request.body) updated = dict() for result in results: try: _tweet = tweet_couch_db.get(id=result) tweet = dict([(k, v) for k, v in _tweet.items() if k not in ('_id', '_rev')]) tweet.update(dict( _id=_tweet.id, _rev=_tweet.rev )) tweet['tags'].update(results[result]['tags']) tweet['model'] = results[result]['model'] _now = timezone.now().astimezone(timezone.utc).strftime('%Y-%m-%d %H:%M:%S%z') tweet.update(dict( ml_update=_now, last_update=_now, process=tweet['process'] + 1 )) tweet_couch_db.save(tweet) updated.update({tweet['_id']: tweet['ml_update']}) except Exception as e: influxdb_handler.make_point(key='api/tweet/trained/', method='POST', error=400, prefix='API') influxdb_handler.make_point(key='api/tweet/trained/', method='POST', error='success', prefix='API', tweet=len(updated)) logger.debug('Tweet post failed %s' % e) resp = init_http_bad_request('Tweet Attribute Required %s' % e) resp['data'] = updated return make_json_response(HttpResponseBadRequest, resp) resp = init_http_success() resp['data'] = updated timer = (time.time() - start_timer) influxdb_handler.make_point(key='api/tweet/trained/', method='POST', error='success', prefix='API', tweet=len(updated), timer=timer) return make_json_response(HttpResponse, resp)
def tweet_pic_post(request): try: file = request.FILES.get('file', None) except Exception as e: file = None logger.debug('No Attached File %s', e) if not file: influxdb_handler.make_point(key='api/tweet/pic/', prefix='API', method='POST', error=400, msg='no attach pic') resp = init_http_not_found('No Attach File') return make_json_response(HttpResponseBadRequest, resp) uid = uuid() pic_id = ''.join(uid.__str__().split('-')) try: object_storage_handler.upload(pic_id + '.jpg', file) except Exception as e: tweet_pic_reconnect(e) object_storage_handler.upload(pic_id + '.jpg', file) try: pic = object_storage_handler.download(pic_id + '.jpg') except Exception as e: tweet_pic_reconnect(e) pic = object_storage_handler.download(pic_id + '.jpg') if not pic: influxdb_handler.make_point(key='api/tweet/pic/', prefix='API', method='POST', error=400) resp = init_http_not_found('Pic Upload Fail') return make_json_response(HttpResponseBadRequest, resp) resp = init_http_success() resp['data'].update(dict(pic_id=pic_id)) influxdb_handler.make_point(key='api/tweet/pic/', prefix='API', method='POST', error='success') return make_json_response(HttpResponse, resp)
def tweet_pic_list(request): def process(s): if 'name' in s: return s['name'].strip('.jpg') try: files = object_storage_handler.findall() except Exception as e: tweet_pic_reconnect(e) files = object_storage_handler.findall() pic_ids = map(process, files) resp = init_http_success() resp['data'].update(dict(pic_ids=pic_ids)) influxdb_handler.make_point(key='api/tweet/pic/', prefix='API', method='GET', error='success') return make_json_response(HttpResponse, resp)
def tweet_trained_zone_vic_post(request): """ This views is used to received result from client to update the vic zone of tweet """ start_timer = time.time() results = ujson.loads(request.body) updated = dict() for result in results: try: _tweet = tweet_couch_db.get(id=result) tweet = dict([(k, v) for k, v in _tweet.items()]) _now = timezone.now().astimezone(timezone.utc).strftime('%Y-%m-%d %H:%M:%S%z') tweet.update(dict( last_update=_now, vic_zone=results[result].get('zone', None) )) tweet_couch_db.save(tweet) updated.update({tweet['_id']: tweet['last_update']}) except Exception as e: influxdb_handler.make_point(key='api/tweet/trained/zone/vic/', method='POST', error=400, prefix='API') influxdb_handler.make_point(key='api/tweet/trained/zone/vic/', method='POST', error='success', prefix='API', tweet=len(updated)) logger.debug('Tweet post failed %s' % e) resp = init_http_bad_request('Tweet Attribute Required %s' % e) resp['data'] = updated return make_json_response(HttpResponseBadRequest, resp) resp = init_http_success() resp['data'] = updated timer = (time.time() - start_timer) influxdb_handler.make_point(key='api/tweet/trained/zone/vic/', method='POST', error='success', prefix='API', tweet=len(updated), timer=timer) return make_json_response(HttpResponse, resp)
def statistics_track_get(request, user_id=None, number=100): """ This function is used to track one user of random user """ def process_tag(tags): result_tags = {} for tag in tags: if tag in ['hentai', 'p**n']: result_tags.update({'lust': [tag]}) elif tag in ['neutral', 'positive', 'negative']: result_tags.update({'sentiment': [tag]}) elif 'text' in tag: result_tags.update({'text': [tag.lstrip('text.')]}) else: result_tags.update({'gluttony': [tag]}) return result_tags def make_this_point(_length, _timer): if user_id: influxdb_handler.make_point(key='api/statistics/track/:user_id/', method='GET', error='success', prefix='API', user=_length, timer=_timer) else: influxdb_handler.make_point(key='api/statistics/track/random/', method='GET', error='success', prefix='API', user=_length, timer=_timer) start_timer = time.time() params = ujson.loads(request.body) if request.body else {} start_time = params.get('start_time', None) end_time = params.get('end_time', None) target_tag = params.get('tags', []) skip = params.get('skip', 0) single = int(params.get('single', 50)) try: start_time = str_to_str_datetime_utc( start_time) if start_time else None end_time = str_to_str_datetime_utc(end_time) if end_time else None except Exception as e: resp = init_http_not_found('Data time format error') return make_json_response(HttpResponseBadRequest, resp) number = 1 if user_id else number today = timezone.now().strftime('%Y-%m-%d') json_name = 'track\\{}\\{}\\{}\\{}.json' json_name = json_name.format( user_id, None if not start_time else start_time.replace(' ', '-'), None if not end_time else end_time.replace(' ', '-'), today) try: # Check if there is a cached results result_file = json_storage_handler.download(json_name) results = ujson.load(result_file) # Process the cached results to meet the input parameter requirement results = dict(tuple(results.items())[skip:skip + number]) for user in results: new_tweet = [] for tweet in results[user]: result_tag = {} if user_id and ((start_time and parse_datetime(tweet['time']) < parse_datetime(start_time)) or (end_time and parse_datetime(tweet['time']) > parse_datetime(end_time))): continue for tag in tweet['tags']: if tag in target_tag or tweet['tags'][tag] in target_tag: result_tag.update({tag: tweet['tags'][tag]}) tweet['tags'] = result_tag if result_tag or not target_tag: new_tweet.append(tweet) results[user] = new_tweet results[user] = results[user][0:single] results[user].sort(key=lambda x: x.get('time')) results = dict( sorted(results.items(), key=lambda item: len(item[1]), reverse=True)) timer = (time.time() - start_timer) make_this_point(len(results), timer) resp = init_http_success() resp['data'].update(results) return make_json_response(HttpResponse, resp) except Exception as e: pass # Avoid possible query timeout while True: try: current_db = tweet_couch_db.get_current_database() if not user_id: if start_time and end_time: tweets = current_db.view('statistics/time_geo_all_tags', startkey=start_time, endkey=end_time, stale='ok', limit=100000) elif start_time: tweets = current_db.view('statistics/time_geo_all_tags', startkey=start_time, stale='ok', limit=100000) elif end_time: tweets = current_db.view('statistics/time_geo_all_tags', endkey=end_time, stale='ok', limit=100000) else: tweets = current_db.view('statistics/time_geo_all_tags', stale='ok', limit=100000) else: tweets = current_db.view('statistics/user_geo', key=user_id, stale='ok', limit=single) tweets = [tweet.value for tweet in tweets] break except Exception as e: logger.debug('Query Timeout %s' % e) influxdb_handler.make_point(key='api/statistics/track/:user_id/', method='GET', error=500, prefix='API') continue results = {} geo_exists = {} for tweet in tweets: user = tweet.get('user') results.update({user: []}) if user not in results else None geo_exists.update({user: []}) if user not in geo_exists else None if user_id or (tweet.get('geo') not in geo_exists[user] and len(results[user]) < 150): geo_exists[user].append(tweet.get('geo')) results[user].append( dict(time=parse_datetime(tweet.get('date')).astimezone( timezone.get_current_timezone()).strftime( '%Y-%m-%d %H:%M:%S%z'), geo=tweet.get('geo'), img_id=tweet.get('img_id'), tags=process_tag(tweet.get('tags')))) if user_id: results[user][-1].update(dict(text=tweet.get('text'))) results = dict( sorted(results.items(), key=lambda item: len(item[1]), reverse=True)) for user in results: results[user].sort(key=lambda x: x.get('time')) # Upload the origin query results to Nectar Object Storage as cache json_file = ujson.dumps(results) try: json_storage_handler.upload(json_name, json_file) except Exception as e: json_storage_handler.reconnect() json_storage_handler.upload(json_name, json_file) # Process the results according th input parameters results = dict(tuple(results.items())[skip:skip + number]) for user in results: new_tweet = [] for tweet in results[user]: result_tag = {} # print(tweet) if user_id and ( (start_time and parse_datetime(tweet['time']) < parse_datetime(start_time)) or (end_time and parse_datetime(tweet['time']) > parse_datetime(end_time))): continue for tag in tweet['tags']: if tag in target_tag or tweet['tags'][tag] in target_tag: result_tag.update({tag: tweet['tags'][tag]}) tweet['tags'] = result_tag if result_tag or not target_tag: new_tweet.append(tweet) results[user] = new_tweet results[user] = results[user][0:single] results[user].sort(key=lambda x: x.get('time')) results = dict( sorted(results.items(), key=lambda item: len(item[1]), reverse=True)) timer = (time.time() - start_timer) make_this_point(len(results), timer) resp = init_http_success() resp['data'].update(results) return make_json_response(HttpResponse, resp)
def statistics_text_get(request): """ Return the statistics results for Natural Language Process """ start_timer = time.time() today = timezone.now().strftime('%Y-%m-%d') json_name = 'text\\{}.json'.format(today) try: # Check if there is a cached results result_file = json_storage_handler.download(json_name) results = ujson.load(result_file) timer = (time.time() - start_timer) influxdb_handler.make_point(key='api/statistics/text/', method='GET', error='success', prefix='API', timer=timer) resp = init_http_success() resp['data'] = results return make_json_response(HttpResponse, resp) except Exception: pass while True: try: current_db = tweet_couch_db.get_current_database() results = current_db.view('statistics/text_result', group=True, stale='ok') results = dict((result.key, result.value) for result in results) break except Exception as e: logger.debug('Query Timeout %s' % e) influxdb_handler.make_point(key='api/statistics/text/', method='GET', error=500, prefix='API') continue text = dict() sentiment = dict() for result in results: if result in ['neutral', 'positive', 'negative']: sentiment.update({result: results[result]}) else: text.update({result: results[result]}) sentiment = dict( sorted(sentiment.items(), key=lambda item: item[1], reverse=True)) text = dict(sorted(text.items(), key=lambda item: item[1], reverse=True)) sentiment = dict(key=sentiment.keys(), value=sentiment.values()) text = dict(key=text.keys(), value=text.values()) results = dict(text=text, sentiment=sentiment) # Upload the results to Nectar Object Storage as cache json_file = ujson.dumps(results) try: json_storage_handler.upload(json_name, json_file) except Exception as e: json_storage_handler.reconnect() json_storage_handler.upload(json_name, json_file) timer = (time.time() - start_timer) influxdb_handler.make_point(key='api/statistics/text/', method='GET', error='success', prefix='API', timer=timer) resp = init_http_success() resp['data'] = results return make_json_response(HttpResponse, resp)
def statistics_machine_get(request): """ Return the statistics results of Machine Learning """ start_timer = time.time() today = timezone.now().strftime('%Y-%m-%d') json_name = 'machine\\{}.json'.format(today) try: # Check if there is a cached results result_file = json_storage_handler.download(json_name) results = ujson.load(result_file) timer = (time.time() - start_timer) influxdb_handler.make_point(key='api/statistics/machine/', method='GET', error='success', prefix='API', timer=timer) resp = init_http_success() resp['data'] = results return make_json_response(HttpResponse, resp) except Exception: pass while True: try: current_db = tweet_couch_db.get_current_database() results = current_db.view('statistics/machine_result', group=True, stale='ok') results = dict((result.key, result.value) for result in results) break except Exception as e: logger.debug('Query Timeout %s' % e) influxdb_handler.make_point(key='api/statistics/machine/', method='GET', error=500, prefix='API') continue lust = dict() gluttony = dict(others=0) for result in results: if result in ['neutral', 'sexy', 'p**n', 'hentai', 'drawings']: lust.update({result: results[result]}) elif results[result] < 10: gluttony['others'] += results[result] else: gluttony.update({result: results[result]}) lust = dict(sorted(lust.items(), key=lambda item: item[1], reverse=True)) gluttony = dict( sorted(gluttony.items(), key=lambda item: item[1], reverse=True)) lust = dict(key=lust.keys(), value=lust.values()) gluttony = dict(key=gluttony.keys(), value=gluttony.values()) results = dict(lust=lust, gluttony=gluttony) # Upload the results to Nectar Object Storage as cache json_file = ujson.dumps(results) try: json_storage_handler.upload(json_name, json_file) except Exception as e: json_storage_handler.reconnect() json_storage_handler.upload(json_name, json_file) timer = (time.time() - start_timer) influxdb_handler.make_point(key='api/statistics/machine/', method='GET', error='success', prefix='API', timer=timer) resp = init_http_success() resp['data'] = results return make_json_response(HttpResponse, resp)
def statistics_zone_vic_get(request, zone=None): """ Return the statistics results according to the zone of Victoria State """ start_timer = time.time() today = timezone.now().strftime('%Y-%m-%d') json_name = 'zone-vic-{}.json'.format(today) vic_json = ujson.load(open(BASE_DIR + '/backend/common/vic_geo.json')) try: # Check if there is a cached results if json_storage_handler.find(json_name): timer = (time.time() - start_timer) influxdb_handler.make_point(key='api/statistics/zone/vic/', method='GET', error='success', prefix='API', timer=timer) # Return cached results directly resp = init_http_success() resp['data'].update( dict(url='http://172.26.38.1:8080/api/statistics/file/%s/' % json_name)) return make_json_response(HttpResponse, resp) except Exception: pass current_db = tweet_couch_db.get_current_database() tweets = current_db.view('statistics/vic_zone_tags', stale='ok', group=True, group_level=3) results = dict() for tweet in tweets: if tweet.key[0] not in results: results.update({tweet.key[0]: {}}) if tweet.key[1] == 'food179': tweet.key[1] = 'gluttony' if tweet.key[1] == 'nsfw': tweet.key[1] = 'lust' if tweet.key[1] not in results[tweet.key[0]]: results[tweet.key[0]].update({tweet.key[1]: {}}) if 'sentiment' not in results[tweet.key[0]]: results[tweet.key[0]].update(dict(sentiment={})) if tweet.key[2] not in results[tweet.key[0]][ tweet.key[1]] and tweet.key[2] not in ['non_food', 'neutral']: if '.' in tweet.key[2]: if 'sentiment' in tweet.key[2]: results[tweet.key[0]]['sentiment'].update( {tweet.key[2].split('.')[1]: tweet.value}) else: results[tweet.key[0]][tweet.key[1]].update( {tweet.key[2].split('.')[1]: tweet.value}) continue results[tweet.key[0]][tweet.key[1]].update( {tweet.key[2]: tweet.value}) for result in results: total = 0 for vic_zone in vic_json['features']: if vic_zone['properties']['vic_lga__3'] == result: if 'gluttony' in results[result]: for item in results[result]['gluttony']: total += results[result]['gluttony'][item] if 'lust' in results[result]: for item in results[result]['lust']: total += results[result]['lust'][item] if 'text' in results[result]: for item in results[result]['text']: total += results[result]['text'][item] vic_zone['properties'].update(dict(name=result)) vic_zone['properties'].update( dict(statistcs=results[result], total=total)) # Upload the results to Nectar Object Storage as cache json_file = ujson.dumps(vic_json) try: json_storage_handler.upload(json_name, json_file) except Exception as e: json_storage_handler.reconnect() json_storage_handler.upload(json_name, json_file) timer = (time.time() - start_timer) influxdb_handler.make_point(key='api/statistics/zone/vic/', method='GET', error='success', prefix='API', timer=timer) resp = init_http_success() resp['data'].update( dict(url='http://172.26.38.1:8080/api/statistics/file/%s/' % json_name)) return make_json_response(HttpResponse, resp)