Ejemplo n.º 1
0
 def set_graph_content(self, graph_file, image=None):
     if image is None:
         try:
             image = GraphViewer.get_image(graph_file)
         except IOError as e:
             error(str(e))
             assert False
     self.__root.geometry(self.__full_geom if self.__fullscreen_mode else
                          '%dx%d+0+0' % (image.size[0], image.size[1]))
     if self.__fullscreen_mode:
         resize_width, resize_height, x_pos, y_pos = self.get_adjusted_geom(image.size[0], image.size[1])
         try:
             resized = image.resize((resize_width, resize_height), Image.ANTIALIAS)
         except IOError as e:
             # 'incomplete downloaded image' may go here
             info(get_msg(Msg.fail_to_convert_image_to_fullscreen), str(e))
             GraphFetcher().handle_image(graph_file, DISCARD)
             return False
         image = resized
     self.__root.title(self.__cur_image_obj.group_name)
     tk_image_obj = ImageTk.PhotoImage(image)
     self.__tk_obj_ref = tk_image_obj
     self.__canvas.delete('all')
     self.__canvas.create_image(x_pos if self.__fullscreen_mode else 0, y_pos if self.__fullscreen_mode else 0,
                                image=tk_image_obj, anchor=Tkinter.NW)
     self.show_onscreen_help()
     self.show_onscreen_info()
     self.show_onscreen_phrase()
     return True
Ejemplo n.º 2
0
 def set_graph_content(self, graph_file, image=None):
     if image is None:
         try:
             image = GraphViewer.get_image(graph_file)
         except IOError as e:
             error("[view] %s" % str(e))
             assert False
     self.__root.geometry(
         self.__full_geom if self.__fullscreen_mode else '%dx%d+0+0' %
         (image.size[0], image.size[1]))
     if self.__fullscreen_mode:
         resize_width, resize_height, x_pos, y_pos = self.get_adjusted_geom(
             image.size[0], image.size[1])
         try:
             resized = image.resize((resize_width, resize_height),
                                    Image.ANTIALIAS)
         except IOError as e:
             # 'incomplete downloaded image' may go here
             info("fail to convert image to fullscreen: %s" % str(e))
             GraphFetcher().handle_image(graph_file, DISCARD)
             return False
         image = resized
     self.__root.title(self.__cur_image_obj.group_name)
     tk_image_obj = ImageTk.PhotoImage(image)
     self.__tk_obj_ref = tk_image_obj
     self.__canvas.delete('all')
     self.__canvas.create_image(x_pos if self.__fullscreen_mode else 0,
                                y_pos if self.__fullscreen_mode else 0,
                                image=tk_image_obj,
                                anchor=Tkinter.NW)
     self.show_onscreen_help()
     self.show_onscreen_info()
     self.show_onscreen_phrase()
     return True
Ejemplo n.º 3
0
 def set_graph(self, image_obj, graph_file=NA):
     self.__cur_image_obj = image_obj
     digest = None
     if NA == graph_file:
         graph_file, digest = GraphDirHandler(image_obj.location).get_graph() if image_obj.location else \
                              GraphFetcher(size=image_obj.size, option=image_obj.option).fetch(image_obj.pattern)
     if NA == graph_file:
         return False
     show(graph_file)
     with open(graph_file, 'rb') as f:
         try:
             image = GraphViewer.get_image(f)
         except IOError as e:
             f.close()  # close f here for we are going to delete the file below
             # some image cannot be opened (maybe it's not image format?), err msg is 'cannot identify image file'
             info(get_msg(Msg.fail_to_open_image), str(e))
             GraphFetcher().handle_image(graph_file, DELETE)
             return False
         # we met "Decompressed Data Too Large" for ~/Inside Out/Image_124.jpg...
         except ValueError as e:
             info(get_msg(Msg.fail_to_open_image), str(e))
             return False
     self.__cur_graph_file = graph_file
     self.__graph_history.append([self.__cur_image_obj, self.__cur_graph_file])
     if digest:
         digest_str = digest + "\n"
     else:
         digest_str = "%s:%s\n" % (get_msg(Msg.path), graph_file)
     self.__cur_digest = digest_str + "%s:%sx%s" % (get_msg(Msg.size), image.size[0], image.size[1])
     self.select_phrase(image_obj.pattern)
     return self.set_graph_content(graph_file, image)
Ejemplo n.º 4
0
 def set_graph(self, image_obj, graph_file=NA):
     self.__cur_image_obj = image_obj
     digest = None
     if NA == graph_file:
         graph_file, digest = GraphDirHandler(image_obj.location).get_graph() if image_obj.location else \
                              GraphFetcher(size=image_obj.size, option=image_obj.option).fetch(image_obj.pattern)
     if NA == graph_file:
         return False
     debug("[view] %s" % graph_file)
     with open(graph_file, 'rb') as f:
         try:
             image = GraphViewer.get_image(f)
         except IOError as e:
             f.close(
             )  # close f here for we are going to delete the file below
             # some image cannot be opened (maybe it's not image format?), err msg is 'cannot identify image file'
             info("fail to open image: %s" % str(e))
             GraphFetcher().handle_image(graph_file, DELETE)
             return False
         # we met "Decompressed Data Too Large" for ~/Inside Out/Image_124.jpg...
         except ValueError as e:
             info("fail to open image: %s" % str(e))
             return False
     self.__cur_graph_file = graph_file
     self.__graph_history.append(
         [self.__cur_image_obj, self.__cur_graph_file])
     if digest:
         digest_str = digest + "\n"
     else:
         digest_str = "%s:%s\n" % ("path", graph_file)
     self.__cur_digest = digest_str + "size:%sx%s" % (image.size[0],
                                                      image.size[1])
     self.select_phrase(image_obj.pattern)
     return self.set_graph_content(graph_file, image)
Ejemplo n.º 5
0
 def delete_image(self, *unused):
     if self.__cur_image_obj.location:
         return  # spec.: not support remove image that user 'specified'
     info(get_msg(Msg.remove_image), self.__cur_graph_file)
     self.__graph_history.remove([self.__cur_image_obj, self.__cur_graph_file])
     GraphFetcher.handle_image(self.__cur_graph_file, DELETE)
     self.cancel_pending_jobs()
     self.timer_action(True)
Ejemplo n.º 6
0
 def decrement_rank(self, *unused):
     info(get_msg(Msg.decrease_rank), self.__cur_graph_file)
     if self.__cur_image_obj.location:
         msg = GraphDirHandler.handle_image(self.__cur_image_obj.location, self.__cur_graph_file, DEC_RANK)
     else:
         msg = GraphFetcher.handle_image(self.__cur_graph_file, DEC_RANK)
     self.__cur_digest += "\n%s" % msg
     self.show_onscreen_info()
Ejemplo n.º 7
0
 def decrement_rank(self, *unused):
     info("decrease rank %s" % self.__cur_graph_file)
     if self.__cur_image_obj.location:
         msg = GraphDirHandler.handle_image(self.__cur_image_obj.location,
                                            self.__cur_graph_file, DEC_RANK)
     else:
         msg = GraphFetcher.handle_image(self.__cur_graph_file, DEC_RANK)
     self.__cur_digest += "\n%s" % msg
     self.show_onscreen_info()
Ejemplo n.º 8
0
 def delete_image(self, *unused):
     if self.__cur_image_obj.location:
         return  # spec.: not support remove image that user 'specified'
     info("remove image %s" % self.__cur_graph_file)
     entry = [self.__cur_image_obj, self.__cur_graph_file]
     self.__graph_history.remove(entry)
     while self.__graph_history.count(entry) > 0:
         self.__graph_history.remove(entry)
     GraphFetcher.handle_image(self.__cur_graph_file, DELETE)
     self.cancel_pending_jobs()
     self.timer_action(True)
Ejemplo n.º 9
0
 def view(self, image_obj_list, phrase_obj_list):
     if not phrase_obj_list:
         # the WTF 'mutable default argument' property makes us not have [] firstly
         phrase_obj_list = []
     if not image_obj_list:
         info(get_msg(Msg.not_any_image_specified_program_exit))
         sys.exit()
     self.setup_image_stuff(image_obj_list)
     self.setup_phrase_stuff(image_obj_list, phrase_obj_list)
     while True:
         self.timer_action(True)
         self.__root.mainloop()
         self.cancel_pending_jobs()
Ejemplo n.º 10
0
 def view(self, image_obj_list, phrase_obj_list):
     if not phrase_obj_list:
         # the WTF 'mutable default argument' property makes us not have [] firstly
         phrase_obj_list = []
     if not image_obj_list:
         info("not any image is specified, program exits")
         sys.exit()
     self.setup_image_stuff(image_obj_list)
     self.setup_phrase_stuff(image_obj_list, phrase_obj_list)
     GraphViewer.set_front()
     while True:
         self.timer_action(True)
         self.__root.mainloop()
         self.cancel_pending_jobs()
Ejemplo n.º 11
0
 def __load_or_create_status(self):
     status_cache = {}  # key: image_file, value: status
     cache_file = self.__location + get_delim() + GraphDirHandler.CACHE_FILE
     cache_existed = os.path.exists(cache_file)
     if cache_existed:
         success, cache_data = load(cache_file)
         assert success
         [timestamp, status_cache] = cache_data
         if not self.dir_changed(timestamp):
             return status_cache
         else:
             info("directory %s has changed, update cache file" %
                  self.__location)
     else:
         info("create a new cache file for directory: %s" % self.__location)
     image_files = []
     for root, _, files in os.walk(self.__location):
         assert len(root) >= 1
         if root[-1] != get_delim():
             root += get_delim()
         for base_file in files:
             basename, ext = os.path.splitext(base_file)
             if ext.replace(".",
                            "") in GraphDirHandler.RECOGNIZED_IMAGE_EXT:
                 image_files.append((root + base_file).replace(
                     self.__location + get_delim(), ""))
     if not image_files:
         if cache_existed:
             os.remove(cache_file)
         self.__valid = False
         return None
     existed_image = {}
     for image in image_files:
         existed_image[image] = 1  # 1 is just a dummy value
         if image not in status_cache:
             status_cache[image] = Status()
     to_be_deleted = []
     for image in status_cache:  # this check works when some image is deleted
         if image not in existed_image:
             to_be_deleted.append(image)
     for image in to_be_deleted:
         status_cache.pop(image)
     # TODO: this makes an 'always' has-changed 2nd time image
     timestamp = time.ctime(os.path.getmtime(self.__location))
     save(cache_file, [timestamp, status_cache])
     return status_cache
Ejemplo n.º 12
0
 def __load_or_create_status(self):
     status_cache = {}  # key: image_file, value: status
     cache_file = self.__location + get_delim() + GraphDirHandler.CACHE_FILE
     cache_existed = os.path.exists(cache_file)
     if cache_existed:
         success, cache_data = load(cache_file)
         assert success
         [timestamp, status_cache] = cache_data
         if not self.dir_changed(timestamp):
             return status_cache
         else:
             info(get_msg(Msg.directory), self.__location, get_msg(Msg.has_changed_update_cache_file))
     else:
         info("%s%s" % (get_msg(Msg.create_new_cache_file_for_directory), self.__location))
     image_files = []
     for root, _, files in os.walk(self.__location):
         assert len(root) >= 1
         if root[-1] != get_delim():
             root += get_delim()
         for base_file in files:
             basename, ext = os.path.splitext(base_file)
             if ext.replace(".", "") in GraphDirHandler.RECOGNIZED_IMAGE_EXT:
                 image_files.append((root + base_file).replace(self.__location, ""))
     if not image_files:
         if cache_existed:
             os.remove(cache_file)
         self.__valid = False
         return None
     existed_image = {}
     for image in image_files:
         existed_image[image] = 1  # 1 is just a dummy value
         if image not in status_cache:
             status_cache[image] = Status()
     to_be_deleted = []
     for image in status_cache:  # this check works when some image is deleted
         if image not in existed_image:
             to_be_deleted.append(image)
     for image in to_be_deleted:
         status_cache.pop(image)
     # TODO: this makes an 'always' has-changed 2nd time image
     timestamp = time.ctime(os.path.getmtime(self.__location))
     save(cache_file, [timestamp, status_cache])
     return status_cache
Ejemplo n.º 13
0
 def crawl(self, pattern, size_list, option="", print_url=False):
     """output: urls, is_new_result"""
     show(get_msg(Msg.search_target), "\"" + pattern + "\"")
     key = Crawler.get_search_key(pattern, option)
     urls, size_ratio = self.get_recent_result(key)
     if urls:
         return urls, False
     if not self.__network_reachable or Crawler.__STOP_SEARCH:
         return None, False
     assert size_list and (not size_ratio or isinstance(size_ratio, dict))
     dice = Crawler.get_dice(size_list, size_ratio)
     urls = []
     next_size_ratio = {size: 0 for size in size_list}  # key: size, value: number of new result (initial with 0)
     start = {size: 1 for size in size_list}  # key: size, value: next search start offset (start from 1 by google)
     tried_size = 0
     while tried_size < TARGET_SEARCH_RESULT_SIZE:
         chosen_size = get_weighted_random_dict_key(dice)
         this_urls, success = Crawler.crawl_by_asking_google_search(pattern, start[chosen_size], chosen_size, option)
         if not success:
             break
         urls += this_urls
         new_result = self.get_this_time_new_result_num(key, this_urls)
         next_size_ratio[chosen_size] += (new_result if NA != new_result else len(this_urls))
         start[chosen_size] += G_SEARCH_PER_REQ_SIZE
         tried_size += G_SEARCH_PER_REQ_SIZE
     # 'set' to filter out duplicated item (though not expected, but we found g-search may give duplicated result)
     urls = list(set(urls))
     if not Crawler._HAS_SHOW_NO_SEARCH_MSG:
         info("%s:%s, %s:%i" % (
           get_msg(Msg.target), pattern,
           get_msg(Msg.acquired_url_count), len(urls)))
     if print_url:
         for url in urls:
             show(url)
     if success:
         next_size_ratio = {size: 1 if 0 == next_size_ratio[size] else next_size_ratio[size]
                            for size in next_size_ratio}
         self.cache_url(key, urls, next_size_ratio)
     return urls, success
Ejemplo n.º 14
0
 def crawl(self, pattern, size_list, option="", print_url=False):
     """output: urls, is_new_result"""
     debug("[search] search target: \"%s\"" % pattern)
     key = Crawler.get_search_key(pattern, option)
     urls, size_ratio = self.get_recent_result(key)
     if urls:
         return urls, False
     if not self.__network_reachable or Crawler.__STOP_SEARCH:
         return None, False
     assert size_list and (not size_ratio or isinstance(size_ratio, dict))
     dice = Crawler.get_dice(size_list, size_ratio)
     urls = []
     next_size_ratio = {size: 0 for size in size_list}  # key: size, value: number of new result (initial with 0)
     start = {size: 1 for size in size_list}  # key: size, value: next search start offset (start from 1 by google)
     tried_size = 0
     while tried_size < get_search_size():
         chosen_size = get_weighted_random_dict_key(dice)
         this_urls, success = Crawler.crawl_by_asking_google_search(pattern, start[chosen_size], chosen_size, option)
         if not success:
             break
         urls += this_urls
         new_result = self.get_this_time_new_result_num(key, this_urls)
         next_size_ratio[chosen_size] += (new_result if NA != new_result else len(this_urls))
         start[chosen_size] += G_SEARCH_PER_REQ_SIZE
         tried_size += G_SEARCH_PER_REQ_SIZE
     # 'set' to filter out duplicated item (though not expected, but we found g-search may give duplicated result)
     urls = list(set(urls))
     if not Crawler._HAS_SHOW_NO_SEARCH_MSG:
         info("target:%s, acquired url count:%i" % (pattern, len(urls)))
     if print_url:
         for url in urls:
             debug("[search] %s" % url)
     if success:
         next_size_ratio = {size: 1 if 0 == next_size_ratio[size] else next_size_ratio[size]
                            for size in next_size_ratio}
         self.cache_url(key, urls, next_size_ratio)
     return urls, success
Ejemplo n.º 15
0
 def crawl_by_asking_google_search(pattern, start, size, option=""):
     assert type(pattern) in [str, unicode]
     from util.global_def import get_api_key, get_cx
     api_key = get_api_key()
     cx = get_cx()
     if not api_key or not cx:
         if not Crawler._HAS_SHOW_NO_SEARCH_MSG:
             Crawler._HAS_SHOW_NO_SEARCH_MSG = True
             info(get_msg(Msg.no_search_due_to_no_api_key_and_cx))
         return [], False
     size_option = "&imgSize=" + size if size else ""
     full_option = size_option + (option if option else "")
     base_url = 'https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&searchType=image&num=%d' \
                '&q=' + pattern + '&start=%d' + full_option
     request_str = base_url % (api_key, cx, G_SEARCH_PER_REQ_SIZE, start)
     urls = []
     success = True
     try:
         r = requests.get(request_str)
         res = json.loads(r.text)
         if "error" in res:
             Crawler.print_error(res["error"])
             if "This API requires billing to be enabled on the project" in res["error"]["message"]:
                 # this is the 'out of quota' message
                 Crawler.__STOP_SEARCH = True
             return urls, False
         if 'items' not in res:
             info(get_msg(Msg.cannot_fetch_image_url), "empty query")
             return urls, True  # return 'True' is okay?
         for image_info in res['items']:
             assert 'link' in image_info
             url = image_info['link']
             urls.append(url)
     except TypeError as e:  # for unhandled error...
         info(get_msg(Msg.cannot_fetch_image_url), str(e))
         success = False
     except requests.ConnectionError as e:
         info(get_msg(Msg.cannot_fetch_image_url), str(e))
         success = False
     return urls, success
Ejemplo n.º 16
0
 def crawl_by_asking_google_search(pattern, start, size, option=""):
     assert type(pattern) in [str, unicode]
     from util.global_def import get_api_key, get_cx
     api_key = get_api_key()
     cx = get_cx()
     if not api_key or not cx:
         if not Crawler._HAS_SHOW_NO_SEARCH_MSG:
             Crawler._HAS_SHOW_NO_SEARCH_MSG = True
             info("as api_key and cx for Google Custom Search is not available, no image search will be issued")
         return [], False
     size_option = "&imgSize=" + size if size else ""
     full_option = size_option + (option if option else "")
     base_url = 'https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&searchType=image&num=%d' \
                '&q=' + pattern + '&start=%d' + full_option
     request_str = base_url % (api_key, cx, G_SEARCH_PER_REQ_SIZE, start)
     urls = []
     success = True
     try:
         r = requests.get(request_str)
         res = json.loads(r.text)
         if "error" in res:
             Crawler.print_error(res["error"])
             if "This API requires billing to be enabled on the project" in res["error"]["message"]:
                 # this is the 'out of quota' message
                 Crawler.__STOP_SEARCH = True
             return urls, False
         if 'items' not in res:
             info("cannot fetch newer image url list: empty query")
             return urls, True  # return 'True' is okay?
         for image_info in res['items']:
             assert 'link' in image_info
             url = image_info['link']
             urls.append(url)
     except TypeError as e:  # for unhandled error...
         info("cannot fetch newer image url list: %s" % str(e))
         success = False
     except requests.ConnectionError as e:
         info("cannot fetch newer image url list: %s" % str(e))
         success = False
     return urls, success
Ejemplo n.º 17
0
 def get_graph_file(self, pattern, url, cached_encoding):
     """output: graph_file, encoding"""
     if NA == cached_encoding:  # mean this url is not retrievable
         return NA, NA
     file_encoding = cached_encoding
     if not file_encoding:
         file_encoding = GraphFetcher.get_file_encoding(pattern)
     graph_dir = GraphFetcher.get_graph_dir(pattern)
     if not os.path.exists(graph_dir):
         try:
             mkdir_p(graph_dir)
         except OSError as e:
             error("[fetch] cannot create program directory, program exits:")
             error(str(e))
             sys.exit()
     abs_graph_file = graph_dir + "image_" + file_encoding + ".jpg"
     if os.path.exists(abs_graph_file):
         return abs_graph_file, file_encoding
     if not self.__network_reachable:
         info("give up fetching image (due to no network connection):")
         return NA, None
     self.__has_write = True
     try:
         info("fetch image: %s" % url)
         try:
             web_content = urllib2.urlopen(url, timeout=10)
         except httplib.BadStatusLine:
             info("give up fetching image (due to no network connection): %s" % url)
             return NA, NA
         fd = open(abs_graph_file, 'wb')
         fd.write(web_content.read())
         fd.close()
         assert os.path.exists(abs_graph_file)
         if os.stat(abs_graph_file).st_size <= 10240:
             info("give up acquired image with size: %s Bytes" % os.stat(abs_graph_file).st_size)
             info("remove image: %s" % abs_graph_file)
             os.remove(abs_graph_file)
             return NA, NA
         info("fetch succeeded")
         return abs_graph_file, file_encoding
     except (IOError, httplib.IncompleteRead, ssl.CertificateError) as e:
         info("failed url: %s" % url)
         info("error: %s" % str(e))
         if os.path.exists(abs_graph_file):
             fd.close()
             os.remove(abs_graph_file)
         return NA, NA
Ejemplo n.º 18
0
 def get_graph_file(self, pattern, url, cached_encoding):
     """output: graph_file, encoding"""
     if NA == cached_encoding:  # mean this url is not retrievable
         return NA, NA
     file_encoding = cached_encoding
     if not file_encoding:
         file_encoding = GraphFetcher.get_file_encoding(pattern)
     graph_dir = GraphFetcher.get_graph_dir(pattern)
     if not os.path.exists(graph_dir):
         try:
             os.makedirs(graph_dir)
         except OSError as e:
             error(get_msg(Msg.cannot_create_directory), str(e))
             import sys
             sys.exit()
     abs_graph_file = graph_dir + "image_" + file_encoding + ".jpg"
     if os.path.exists(abs_graph_file):
         return abs_graph_file, file_encoding
     if not self.__network_reachable:
         info(get_msg(Msg.give_up_fetch_image))
         return NA, None
     self.__has_write = True
     try:
         info(get_msg(Msg.fetch_image), url)
         try:
             web_content = urllib2.urlopen(url, timeout=10)
         except httplib.BadStatusLine:
             info(get_msg(Msg.obtain_unrecognized_status_code), url)
             return NA, NA
         fd = open(abs_graph_file, 'wb')
         fd.write(web_content.read())
         fd.close()
         assert os.path.exists(abs_graph_file)
         if os.stat(abs_graph_file).st_size <= 10240:
             info(get_msg(Msg.give_up_acquired_image_with_size), os.stat(abs_graph_file).st_size, "Bytes")
             info(get_msg(Msg.remove_image), abs_graph_file)
             os.remove(abs_graph_file)
             return NA, NA
         info(get_msg(Msg.fetch_succeed))
         return abs_graph_file, file_encoding
     except (IOError, httplib.IncompleteRead, ssl.CertificateError) as e:
         info(get_msg(Msg.failed_url), url)
         info(get_msg(Msg.error_message), str(e))
         if os.path.exists(abs_graph_file):
             fd.close()
             os.remove(abs_graph_file)
         return NA, NA