def encodeValue(value): string_org = value try: value = smart_unicode(value) except UnicodeEncodeError as e: print("goose.text.encodeValue.UnicodeEncodeError: ", e) value = smart_str(value) except DjangoUnicodeDecodeError as e: print("goose.text.encodeValue.DjangoUnicodeDecodeError: ", e) value = smart_str(value) except Exception as e: print("goose.text.encodeValue.Exception: ", e) value = string_org return value
def encodeValue(value): string_org = value try: value = smart_unicode(value) except (UnicodeEncodeError, DjangoUnicodeDecodeError): value = smart_str(value) except Exception: value = string_org return value
def encodeValue(value): string_org = value try: value = smart_unicode(value) except (UnicodeEncodeError, DjangoUnicodeDecodeError): value = smart_str(value) except: value = string_org return value
def encodeValue(string): string_org = string try: string = smart_unicode(string) except (UnicodeEncodeError, DjangoUnicodeDecodeError): string = smart_str(string) except: string = string_org return string
def encodeValue(value): string_org = value try: value = smart_unicode(value) except (UnicodeEncodeError): value = smart_str(value) except: value = string_org return value
def get_parsing_candidate(self, url_to_crawl): # replace shebang is urls final_url = url_to_crawl.replace('#!', '?_escaped_fragment_=') \ if '#!' in url_to_crawl else url_to_crawl try: md5_object = hashlib.md5(final_url) except UnicodeEncodeError: final_url = smart_str(final_url) md5_object = hashlib.md5(final_url) link_hash = '%s.%s' % (md5_object.hexdigest(), time.time()) return ParsingCandidate(final_url, link_hash)
def get_localfile_name(self, link_hash, src, config): image_hash = hashlib.md5(smart_str(src)).hexdigest() return os.path.join(config.local_storage_path, '%s_%s' % (link_hash, image_hash))
def getLocalFileName(self, linkhash, imageSrc, config): imageHash = hashlib.md5(smart_str(imageSrc)).hexdigest() return config.localStoragePath + "/" + linkhash + "_py_" + imageHash