Exemple #1
0
    def set_using_json(self, post_json):
        self.doc_id = post_json["doc_id"]
        post_json = post_json["fields"]
        self.post_id = post_json["post_id"][0]
        self.text = post_json["text"][0]

        if post_json.get('created'):
            self.created = post_json["created"][0]
        else:
            self.created = post_json["@timestamp"][0]

        url_util = Url()
        media_url = url_util.get_url_from_string(post_json["content_img_url"][0])
        if not media_url:
            # see if text field has a url in it
            media_url = url_util.get_url_from_string(post_json["text"][0].encode("utf-8"))
        self.content_img_url = media_url
        self.user_img_url = post_json["user_img_url"][0]

        if post_json.get('source'):
            self.source = post_json["source"][0]
        else:
            self.source = post_json["type"][0]

        self.up_votes = post_json["up_votes"][0]
        self.user_id = post_json["user_id"][0]
        self.username = post_json["username"][0]
        self.place_name = post_json["place_name"][0]
        self.distance = post_json["distance"][0]

        if post_json.get("category_id"):
            self.category_id = post_json["category_id"][0]
        else:
            self.category_id = 0

        if post_json.get("user_profile_url"):
            self.user_profile_url = post_json["user_profile_url"][0]
        else:
            self.user_profile_url = User().get_profile_url(userid = self.user_id, source=self.source, username=self.username)

        self.coord = post_json["coord"][0]
Exemple #2
0
 def get_feed_around_coord(self, from_datetime, coord, q_from, q_size, encoded_tags, radius, sort, filterdays):
     f = Feed()
     data = []
     result = json.loads(f.get_feed_around_coord(from_datetime, coord, q_from, q_size, encoded_tags, radius, sort,
                                                 filterdays))
     if result["hits"]["total"] > 0:
         for p in result["hits"]["hits"]:
             field = p["fields"]
             try:
                 url_util = Url()
                 media_url = url_util.get_url_from_string(field.get("content_img_url")[0])
                 if not media_url:
                     # see if text field has a url in it
                     media_url = url_util.get_url_from_string(field.get("text")[0].encode("utf-8"))
                 data.append(Post( p["_id"], field.get("post_id")[0], field.get("text")[0].encode("utf-8"), Date().get_obj(field.get("@timestamp")[0]),
                                   media_url, field.get("user_img_url")[0], field.get("type")[0], field.get("user_id")[0],
                                   field.get("place_name")[0], field.get("coord")[0], field.get("username")[0],
                                   field.get("up_votes")[0], 0, field.get("distance")[0]))
             except Exception, e:
                 # fetcher engine and logstash must ensure clean data gets into elasticsearch which confirms to the Post object
                 logging.exception(e)
                 logging.exception(p)
Exemple #3
0
 def test_find_url_in_string(self):
     uh = Url()
     test_string = """"s@russian_market: Meanwhile in London!!!! ( h/t @QuantumSquawk) http://t.co/KOKjMXlJab" haha is he phoning while pole sitting?"""
     embedded_url = "http://t.co/KOKjMXlJab"
     data = uh.get_url_from_string(test_string)
     self.assertEqual(data, embedded_url)