Exemple #1
0
 def set_graph(self, image_obj, graph_file=NA):
     self.__cur_image_obj = image_obj
     digest = None
     if NA == graph_file:
         graph_file, digest = GraphDirHandler(image_obj.location).get_graph() if image_obj.location else \
                              GraphFetcher(size=image_obj.size, option=image_obj.option).fetch(image_obj.pattern)
     if NA == graph_file:
         return False
     show(graph_file)
     with open(graph_file, 'rb') as f:
         try:
             image = GraphViewer.get_image(f)
         except IOError as e:
             f.close()  # close f here for we are going to delete the file below
             # some image cannot be opened (maybe it's not image format?), err msg is 'cannot identify image file'
             info(get_msg(Msg.fail_to_open_image), str(e))
             GraphFetcher().handle_image(graph_file, DELETE)
             return False
         # we met "Decompressed Data Too Large" for ~/Inside Out/Image_124.jpg...
         except ValueError as e:
             info(get_msg(Msg.fail_to_open_image), str(e))
             return False
     self.__cur_graph_file = graph_file
     self.__graph_history.append([self.__cur_image_obj, self.__cur_graph_file])
     if digest:
         digest_str = digest + "\n"
     else:
         digest_str = "%s:%s\n" % (get_msg(Msg.path), graph_file)
     self.__cur_digest = digest_str + "%s:%sx%s" % (get_msg(Msg.size), image.size[0], image.size[1])
     self.select_phrase(image_obj.pattern)
     return self.set_graph_content(graph_file, image)
Exemple #2
0
 def handle_image(location, graph_file, action):
     assert action in [INC_RANK, DEC_RANK]
     handler = GraphDirHandler(location)
     assert handler.__valid
     base_file = graph_file.replace(location + get_delim(), "")
     for image in handler.__status_cache:
         if image == base_file:
             has_change = True
             status = handler.__status_cache[image]
             if INC_RANK == action:
                 status.rank += 1
                 msg = get_msg(Msg.change_rank_to) + str(status.rank)
             else:
                 if 1 == status.rank:
                     msg = get_msg(Msg.cannot_lower_down_rank_as_it_is_already_the_lowest)
                     has_change = False
                 else:
                     status.rank -= 1
                     msg = get_msg(Msg.change_rank_to) + str(status.rank)
             if has_change:
                 handler.__status_cache[image] = status
                 cache_file = location + get_delim() + GraphDirHandler.CACHE_FILE
                 # TODO: it shows that the timestamp not change...
                 timestamp = time.ctime(os.path.getmtime(location))
                 save(cache_file, [timestamp, handler.__status_cache])
             return msg
     assert False
Exemple #3
0
 def handle_image(graph_file, action):
     assert action in [DELETE, DISCARD, INC_RANK, DEC_RANK]
     key_str = get_delim() + "image_"
     end_pos = graph_file.find(key_str)
     assert -1 != end_pos
     begin_pos = graph_file[:end_pos].rfind(get_delim())
     assert -1 != begin_pos
     pattern = graph_file[begin_pos + 1:end_pos]
     has_cache, cached_objs = load(GraphFetcher.get_cache_file(pattern))
     assert has_cache
     file_encoding = graph_file[graph_file.find(key_str) + len(key_str):graph_file.find(".jpg")]
     for url in cached_objs:
         image_slot = cached_objs[url]
         if image_slot.encoding == file_encoding:
             new_encoding = NA if DELETE == action else \
                 None if DISCARD == action else \
                 image_slot.encoding  # no change
             new_rank = image_slot.rank + 1 if INC_RANK == action else \
                 image_slot.rank - 1 if DEC_RANK == action and image_slot.rank is not 1 else \
                 image_slot.rank  # no change
             updated_slot = ImageSlot(timestamp=image_slot.timestamp,
                                      encoding=new_encoding,
                                      rank=new_rank)
             cached_objs[url] = updated_slot
             save(GraphFetcher.get_cache_file(pattern), cached_objs)
             if action in [DELETE, DISCARD]:
                 os.remove(graph_file)
             msg = "" if action in [DELETE, DISCARD] else \
                 get_msg(Msg.change_rank_to) + str(new_rank) + "!" if new_rank is not image_slot.rank else \
                 get_msg(Msg.cannot_lower_down_rank_as_it_is_already_the_lowest) if image_slot.rank is 1 else \
                 None
             assert msg is not None
             return msg
     assert False
Exemple #4
0
 def get_graph_digest(self, graph_file):
     if NA is graph_file:
         return "NA"
     full_graph_file = self.__location + get_delim() + graph_file
     timestamp = time.ctime(os.path.getmtime(full_graph_file))
     return "%s:%s\n%s:%s\n%s:%s" % (
         get_msg(Msg.location), full_graph_file,
         get_msg(Msg.timestamp), timestamp,
         get_msg(Msg.rank), self.__status_cache[graph_file].rank)
Exemple #5
0
 def get_graph_digest(graph_file, url_obj):
     if NA is graph_file:
         return "NA"
     relative_pos = graph_file.find(pic_home())
     assert 0 == relative_pos
     relative_graph_file = graph_file[len(pic_home()):]
     return "%s:%s\n%s:%s\n%s:%s" % (
         get_msg(Msg.location), relative_graph_file,
         get_msg(Msg.timestamp), url_obj.timestamp.strftime("%B %d, %Y"),
         get_msg(Msg.rank), url_obj.rank)
Exemple #6
0
def check_access_status():
    print(get_msg(Msg.check_network_connection))
    try:
        urllib2.urlopen('http://google.com', timeout=3)
        print(get_msg(Msg.network_status_succeed))
        return True
    except urllib2.URLError:
        pass
    print(get_msg(Msg.network_status_fail))
    return False
Exemple #7
0
def load(pickle_file):
    """output: is_exist, value"""
    try:
        pickle_fd = open(pickle_file, "r")
    except IOError as err:
        if errno.ENOENT == err.errno:
            show(get_msg(Msg.cache_file_does_not_exist), pickle_file)
            return False, None
        assert False
    try:
        value = cPickle.load(pickle_fd)
        return True, value
    except (ValueError, UnpicklingError, EOFError):
        error(get_msg(Msg.cannot_read_pickle_file), pickle_file, get_msg(Msg.suggest_re_fetch_pickle_file))
        assert False
Exemple #8
0
 def set_graph_content(self, graph_file, image=None):
     if image is None:
         try:
             image = GraphViewer.get_image(graph_file)
         except IOError as e:
             error(str(e))
             assert False
     self.__root.geometry(self.__full_geom if self.__fullscreen_mode else
                          '%dx%d+0+0' % (image.size[0], image.size[1]))
     if self.__fullscreen_mode:
         resize_width, resize_height, x_pos, y_pos = self.get_adjusted_geom(image.size[0], image.size[1])
         try:
             resized = image.resize((resize_width, resize_height), Image.ANTIALIAS)
         except IOError as e:
             # 'incomplete downloaded image' may go here
             info(get_msg(Msg.fail_to_convert_image_to_fullscreen), str(e))
             GraphFetcher().handle_image(graph_file, DISCARD)
             return False
         image = resized
     self.__root.title(self.__cur_image_obj.group_name)
     tk_image_obj = ImageTk.PhotoImage(image)
     self.__tk_obj_ref = tk_image_obj
     self.__canvas.delete('all')
     self.__canvas.create_image(x_pos if self.__fullscreen_mode else 0, y_pos if self.__fullscreen_mode else 0,
                                image=tk_image_obj, anchor=Tkinter.NW)
     self.show_onscreen_help()
     self.show_onscreen_info()
     self.show_onscreen_phrase()
     return True
Exemple #9
0
def save(pickle_file, value):
    pickle_fd = open(pickle_file, "w")
    try:
        cPickle.dump(value, pickle_fd)
    except AttributeError as msg:
        error(get_msg(Msg.fail_to_write_cache), str(msg))
    pickle_fd.close()
Exemple #10
0
 def __init__(self, in_file):
     self.__fd = open(in_file)
     try:
         self.__json_data = json.load(self.__fd)
     except Exception, e:
         error(get_msg(Msg.fail_read_file) + "\"", in_file, "\"")
         error(str(e))
         assert False
Exemple #11
0
 def delete_image(self, *unused):
     if self.__cur_image_obj.location:
         return  # spec.: not support remove image that user 'specified'
     info(get_msg(Msg.remove_image), self.__cur_graph_file)
     self.__graph_history.remove([self.__cur_image_obj, self.__cur_graph_file])
     GraphFetcher.handle_image(self.__cur_graph_file, DELETE)
     self.cancel_pending_jobs()
     self.timer_action(True)
Exemple #12
0
 def decrement_rank(self, *unused):
     info(get_msg(Msg.decrease_rank), self.__cur_graph_file)
     if self.__cur_image_obj.location:
         msg = GraphDirHandler.handle_image(self.__cur_image_obj.location, self.__cur_graph_file, DEC_RANK)
     else:
         msg = GraphFetcher.handle_image(self.__cur_graph_file, DEC_RANK)
     self.__cur_digest += "\n%s" % msg
     self.show_onscreen_info()
Exemple #13
0
 def get_recent_result(self, key):
     """output: urls, size_ratio"""
     if key not in self.__url_map:
         return None, None
     [retrieved_date, new_result, urls, size_ratio] = self.__url_map[key]
     if not self.__network_reachable or Crawler.__STOP_SEARCH:
         show(get_msg(Msg.use_previous_search_result))
         # though size_ratio can be valid, we do not return it for caller usage is not expected
         return urls, None
     # spec.: we will execute a new search when there is enough new result on previous search
     #       => if previous new result is n, all result is m, we will have a new search after m/n days
     #       => if all previous result is new, then after 1 day we will have a search
     #       => if no previous result is new, then we will have a search after 'TARGET_SEARCH_RESULT_SIZE' days
     valid_day_size = len(urls) / new_result if new_result > 0 else \
         1 if NA is new_result else \
         TARGET_SEARCH_RESULT_SIZE  # new_result = 0 => no new result before
     from util.global_def import get_search_latency
     valid_day_size *= get_search_latency()
     current_date = datetime.today()
     date_diff = current_date - retrieved_date
     if date_diff > timedelta(days=valid_day_size):  # 'valid_day_size' is the valid duration of search result
         return None, size_ratio
     to_next_query = timedelta(days=valid_day_size) - date_diff
     hours, remainder = divmod(to_next_query.seconds, 3600)
     minutes, seconds = divmod(remainder, 60)
     show(get_msg(Msg.to_next_search),
          to_next_query.days, get_msg(Msg.day),
          hours, get_msg(Msg.hour),
          minutes, get_msg(Msg.minute),
          seconds, (get_msg(Msg.second) + ","), get_msg(Msg.current_url_count), len(urls))
     # though size_ratio can be valid, we do not return it for caller usage is not expected
     return urls, None
Exemple #14
0
 def timer_action(self, user_next_image=False):
     if not user_next_image and self.__pause_slideshow:
         self.prepare_for_next_view(get_slideshow_frequency() * 1000)
         return
     success = self.set_graph(self.select_pattern())
     if not success:
         self.prepare_for_next_view(1, get_msg(Msg.try_fetch_image_again))
         return
     self.prepare_for_next_view(get_slideshow_frequency() * 1000)
Exemple #15
0
 def __load_or_create_status(self):
     status_cache = {}  # key: image_file, value: status
     cache_file = self.__location + get_delim() + GraphDirHandler.CACHE_FILE
     cache_existed = os.path.exists(cache_file)
     if cache_existed:
         success, cache_data = load(cache_file)
         assert success
         [timestamp, status_cache] = cache_data
         if not self.dir_changed(timestamp):
             return status_cache
         else:
             info(get_msg(Msg.directory), self.__location, get_msg(Msg.has_changed_update_cache_file))
     else:
         info("%s%s" % (get_msg(Msg.create_new_cache_file_for_directory), self.__location))
     image_files = []
     for root, _, files in os.walk(self.__location):
         assert len(root) >= 1
         if root[-1] != get_delim():
             root += get_delim()
         for base_file in files:
             basename, ext = os.path.splitext(base_file)
             if ext.replace(".", "") in GraphDirHandler.RECOGNIZED_IMAGE_EXT:
                 image_files.append((root + base_file).replace(self.__location, ""))
     if not image_files:
         if cache_existed:
             os.remove(cache_file)
         self.__valid = False
         return None
     existed_image = {}
     for image in image_files:
         existed_image[image] = 1  # 1 is just a dummy value
         if image not in status_cache:
             status_cache[image] = Status()
     to_be_deleted = []
     for image in status_cache:  # this check works when some image is deleted
         if image not in existed_image:
             to_be_deleted.append(image)
     for image in to_be_deleted:
         status_cache.pop(image)
     # TODO: this makes an 'always' has-changed 2nd time image
     timestamp = time.ctime(os.path.getmtime(self.__location))
     save(cache_file, [timestamp, status_cache])
     return status_cache
Exemple #16
0
 def get_graph_file(self, pattern, url, cached_encoding):
     """output: graph_file, encoding"""
     if NA == cached_encoding:  # mean this url is not retrievable
         return NA, NA
     file_encoding = cached_encoding
     if not file_encoding:
         file_encoding = GraphFetcher.get_file_encoding(pattern)
     graph_dir = GraphFetcher.get_graph_dir(pattern)
     if not os.path.exists(graph_dir):
         try:
             os.makedirs(graph_dir)
         except OSError as e:
             error(get_msg(Msg.cannot_create_directory), str(e))
             import sys
             sys.exit()
     abs_graph_file = graph_dir + "image_" + file_encoding + ".jpg"
     if os.path.exists(abs_graph_file):
         return abs_graph_file, file_encoding
     if not self.__network_reachable:
         info(get_msg(Msg.give_up_fetch_image))
         return NA, None
     self.__has_write = True
     try:
         info(get_msg(Msg.fetch_image), url)
         try:
             web_content = urllib2.urlopen(url, timeout=10)
         except httplib.BadStatusLine:
             info(get_msg(Msg.obtain_unrecognized_status_code), url)
             return NA, NA
         fd = open(abs_graph_file, 'wb')
         fd.write(web_content.read())
         fd.close()
         assert os.path.exists(abs_graph_file)
         if os.stat(abs_graph_file).st_size <= 10240:
             info(get_msg(Msg.give_up_acquired_image_with_size), os.stat(abs_graph_file).st_size, "Bytes")
             info(get_msg(Msg.remove_image), abs_graph_file)
             os.remove(abs_graph_file)
             return NA, NA
         info(get_msg(Msg.fetch_succeed))
         return abs_graph_file, file_encoding
     except (IOError, httplib.IncompleteRead, ssl.CertificateError) as e:
         info(get_msg(Msg.failed_url), url)
         info(get_msg(Msg.error_message), str(e))
         if os.path.exists(abs_graph_file):
             fd.close()
             os.remove(abs_graph_file)
         return NA, NA
Exemple #17
0
 def crawl_by_asking_google_search(pattern, start, size, option=""):
     assert type(pattern) in [str, unicode]
     from util.global_def import get_api_key, get_cx
     api_key = get_api_key()
     cx = get_cx()
     if not api_key or not cx:
         if not Crawler._HAS_SHOW_NO_SEARCH_MSG:
             Crawler._HAS_SHOW_NO_SEARCH_MSG = True
             info(get_msg(Msg.no_search_due_to_no_api_key_and_cx))
         return [], False
     size_option = "&imgSize=" + size if size else ""
     full_option = size_option + (option if option else "")
     base_url = 'https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&searchType=image&num=%d' \
                '&q=' + pattern + '&start=%d' + full_option
     request_str = base_url % (api_key, cx, G_SEARCH_PER_REQ_SIZE, start)
     urls = []
     success = True
     try:
         r = requests.get(request_str)
         res = json.loads(r.text)
         if "error" in res:
             Crawler.print_error(res["error"])
             if "This API requires billing to be enabled on the project" in res["error"]["message"]:
                 # this is the 'out of quota' message
                 Crawler.__STOP_SEARCH = True
             return urls, False
         if 'items' not in res:
             info(get_msg(Msg.cannot_fetch_image_url), "empty query")
             return urls, True  # return 'True' is okay?
         for image_info in res['items']:
             assert 'link' in image_info
             url = image_info['link']
             urls.append(url)
     except TypeError as e:  # for unhandled error...
         info(get_msg(Msg.cannot_fetch_image_url), str(e))
         success = False
     except requests.ConnectionError as e:
         info(get_msg(Msg.cannot_fetch_image_url), str(e))
         success = False
     return urls, success
Exemple #18
0
 def view(self, image_obj_list, phrase_obj_list):
     if not phrase_obj_list:
         # the WTF 'mutable default argument' property makes us not have [] firstly
         phrase_obj_list = []
     if not image_obj_list:
         info(get_msg(Msg.not_any_image_specified_program_exit))
         sys.exit()
     self.setup_image_stuff(image_obj_list)
     self.setup_phrase_stuff(image_obj_list, phrase_obj_list)
     while True:
         self.timer_action(True)
         self.__root.mainloop()
         self.cancel_pending_jobs()
Exemple #19
0
 def crawl(self, pattern, size_list, option="", print_url=False):
     """output: urls, is_new_result"""
     show(get_msg(Msg.search_target), "\"" + pattern + "\"")
     key = Crawler.get_search_key(pattern, option)
     urls, size_ratio = self.get_recent_result(key)
     if urls:
         return urls, False
     if not self.__network_reachable or Crawler.__STOP_SEARCH:
         return None, False
     assert size_list and (not size_ratio or isinstance(size_ratio, dict))
     dice = Crawler.get_dice(size_list, size_ratio)
     urls = []
     next_size_ratio = {size: 0 for size in size_list}  # key: size, value: number of new result (initial with 0)
     start = {size: 1 for size in size_list}  # key: size, value: next search start offset (start from 1 by google)
     tried_size = 0
     while tried_size < TARGET_SEARCH_RESULT_SIZE:
         chosen_size = get_weighted_random_dict_key(dice)
         this_urls, success = Crawler.crawl_by_asking_google_search(pattern, start[chosen_size], chosen_size, option)
         if not success:
             break
         urls += this_urls
         new_result = self.get_this_time_new_result_num(key, this_urls)
         next_size_ratio[chosen_size] += (new_result if NA != new_result else len(this_urls))
         start[chosen_size] += G_SEARCH_PER_REQ_SIZE
         tried_size += G_SEARCH_PER_REQ_SIZE
     # 'set' to filter out duplicated item (though not expected, but we found g-search may give duplicated result)
     urls = list(set(urls))
     if not Crawler._HAS_SHOW_NO_SEARCH_MSG:
         info("%s:%s, %s:%i" % (
           get_msg(Msg.target), pattern,
           get_msg(Msg.acquired_url_count), len(urls)))
     if print_url:
         for url in urls:
             show(url)
     if success:
         next_size_ratio = {size: 1 if 0 == next_size_ratio[size] else next_size_ratio[size]
                            for size in next_size_ratio}
         self.cache_url(key, urls, next_size_ratio)
     return urls, success
Exemple #20
0
 def select_pattern(self):
     if self.__arbitrator.is_active():
         choice_pattern = None
         while not choice_pattern:
             choice_pattern = self.__arbitrator.arbitrate()
             if not choice_pattern:
                 show(get_msg(Msg.no_available_image_wait_10_minutes))
                 self.__root.withdraw()
                 import time
                 time.sleep(600)
         self.__root.deiconify()
         return self.__cur_image_obj_dict[choice_pattern]
     image_obj_size = len(self.__cur_image_obj_list)
     return self.__cur_image_obj_list[random.randrange(0, image_obj_size)]
Exemple #21
0
    def __parse_config(self, config_file):
        from util.config import Config

        config = Config(config_file)
        config.set_general_setting()
        image_target = config.get_setting("image", "target")
        if not image_target:
            print(get_msg(Msg.not_any_image_specified_program_exit))
            sys.exit()
        phrase_target = config.get_setting("phrase", "target")
        import glob

        self.__image_setting += glob.glob(image_target)
        self.__phrase_setting += glob.glob(phrase_target) if phrase_target else []
Exemple #22
0
 def fetch(self, pattern):
     self.__has_write = False
     new_objs, old_objs = self.get_updated_url(pattern)
     show(get_msg(Msg.total_data_count), len(new_objs) + len(old_objs))
     url = self.choose_url(new_objs, old_objs)
     if NA == url:
         return NA, NA
     image_objs = old_objs
     image_objs.update(new_objs)
     image_slot = image_objs[url]
     graph_file, new_encoding = self.get_graph_file(pattern, url, image_slot.encoding)
     new_slot = ImageSlot(image_slot.timestamp, new_encoding, image_slot.rank)
     image_objs[url] = new_slot
     if self.__has_write:
         save(GraphFetcher.get_cache_file(pattern), image_objs)
     return graph_file, GraphFetcher.get_graph_digest(graph_file, image_objs[url])
Exemple #23
0
    @staticmethod
    def get_file_encoding(pattern):
        # TODO: add a file to keep last largest number to avoid possible long glob time...
        file_list = glob.glob(GraphFetcher.get_graph_dir(pattern) + "image_*.jpg")
        largest_idx = 0
        # noinspection PyShadowingNames
        for graph_file in file_list:
            begin_pos = graph_file.find("image_")
            end_pos = graph_file.find(".jpg")
            assert -1 != begin_pos and -1 != end_pos
            begin_pos += len("image_")
            iter_idx = int(graph_file[begin_pos:end_pos])
            assert iter_idx > 0
            largest_idx = iter_idx if iter_idx > largest_idx else largest_idx
        largest_idx += 1
        return str(largest_idx)


if __name__ == '__main__':
    from util.global_def import config_action
    config_action()
    # name '_' before the 'obj' to let python not free imported module before __del__ is called
    # (or we will have 'NoneType' object has no attribute 'dump' for cPickle.dump)
    _obj = GraphFetcher()
    graph_file, digest = _obj.fetch("Inside Out")
    if NA == graph_file:
        print(get_msg(Msg.fetch_image_fail))
    else:
        print(graph_file)
Exemple #24
0
 def help_str():
     return get_msg(Msg.help_message)
Exemple #25
0
 def print_error(data):
     assert isinstance(data, dict) and "message" in data
     error(get_msg(Msg.search_engine_err_msg), data["message"])