def set_using_json(self, post_json): self.doc_id = post_json["doc_id"] post_json = post_json["fields"] self.post_id = post_json["post_id"][0] self.text = post_json["text"][0] if post_json.get('created'): self.created = post_json["created"][0] else: self.created = post_json["@timestamp"][0] url_util = Url() media_url = url_util.get_url_from_string(post_json["content_img_url"][0]) if not media_url: # see if text field has a url in it media_url = url_util.get_url_from_string(post_json["text"][0].encode("utf-8")) self.content_img_url = media_url self.user_img_url = post_json["user_img_url"][0] if post_json.get('source'): self.source = post_json["source"][0] else: self.source = post_json["type"][0] self.up_votes = post_json["up_votes"][0] self.user_id = post_json["user_id"][0] self.username = post_json["username"][0] self.place_name = post_json["place_name"][0] self.distance = post_json["distance"][0] if post_json.get("category_id"): self.category_id = post_json["category_id"][0] else: self.category_id = 0 if post_json.get("user_profile_url"): self.user_profile_url = post_json["user_profile_url"][0] else: self.user_profile_url = User().get_profile_url(userid = self.user_id, source=self.source, username=self.username) self.coord = post_json["coord"][0]
def get_feed_around_coord(self, from_datetime, coord, q_from, q_size, encoded_tags, radius, sort, filterdays): f = Feed() data = [] result = json.loads(f.get_feed_around_coord(from_datetime, coord, q_from, q_size, encoded_tags, radius, sort, filterdays)) if result["hits"]["total"] > 0: for p in result["hits"]["hits"]: field = p["fields"] try: url_util = Url() media_url = url_util.get_url_from_string(field.get("content_img_url")[0]) if not media_url: # see if text field has a url in it media_url = url_util.get_url_from_string(field.get("text")[0].encode("utf-8")) data.append(Post( p["_id"], field.get("post_id")[0], field.get("text")[0].encode("utf-8"), Date().get_obj(field.get("@timestamp")[0]), media_url, field.get("user_img_url")[0], field.get("type")[0], field.get("user_id")[0], field.get("place_name")[0], field.get("coord")[0], field.get("username")[0], field.get("up_votes")[0], 0, field.get("distance")[0])) except Exception, e: # fetcher engine and logstash must ensure clean data gets into elasticsearch which confirms to the Post object logging.exception(e) logging.exception(p)
def test_find_url_in_string(self): uh = Url() test_string = """"s@russian_market: Meanwhile in London!!!! ( h/t @QuantumSquawk) http://t.co/KOKjMXlJab" haha is he phoning while pole sitting?""" embedded_url = "http://t.co/KOKjMXlJab" data = uh.get_url_from_string(test_string) self.assertEqual(data, embedded_url)