def store_text_cap(url, title, link_guid): bid, tdata, success = instapaper_capture(url, title) if success: asset = Asset.objects.get(link__guid=link_guid) asset.instapaper_timestamp = datetime.datetime.now() h = smhasher.murmur3_x86_128(tdata) asset.instapaper_hash = h asset.instapaper_id = bid asset.save() file_path = GENERATED_ASSETS_STORAGE + '/' + asset.base_storage_path if not os.path.exists(file_path): os.makedirs(file_path) f = open(file_path + '/instapaper_cap.html', 'w') f.write(tdata) os.fsync(f) f.close if os.path.exists(file_path + '/instapaper_cap.html'): asset.text_capture = 'instapaper_cap.html' asset.save() else: logger.info("Text (instapaper) capture failed for %s" % target_url) asset.text_capture = 'failed' asset.save() else: # Must have received something other than an HTTP 200 from Instapaper, or no response object at all logger.info("Text (instapaper) capture failed for %s" % target_url) asset = Asset.objects.get(link__guid=link_guid) asset.text_capture = 'failed' asset.save()
def store_text_cap(url, title, link_guid): asset = Asset.objects.get(link__guid=link_guid) bid, tdata = instapaper_capture(url, title) asset.instapaper_timestamp = datetime.datetime.now() h = smhasher.murmur3_x86_128(tdata) asset.instapaper_hash = h asset.instapaper_id = bid asset.save() file_path = GENERATED_ASSETS_STORAGE + '/' + asset.base_storage_path if not os.path.exists(file_path): os.makedirs(file_path) f = open(file_path + '/instapaper_cap.html', 'w') f.write(tdata) os.fsync(f) f.close if os.path.exists(file_path + '/instapaper_cap.html'): asset.text_capture = 'instapaper_cap.html' asset.save() else: logger.info("Text (instapaper) capture failed for %s" % target_url) asset.text_capture = 'failed' asset.save()
def store_text_cap(link_guid, target_url, base_storage_path, title): # basic setup asset_query = get_asset_query(link_guid) storage_path = get_storage_path(base_storage_path) create_storage_dir(storage_path) bid, tdata, success = instapaper_capture(target_url, title) if success: with open(storage_path + '/instapaper_cap.html', 'wb') as f: f.write(tdata) if os.path.exists(storage_path + '/instapaper_cap.html'): text_capture = 'instapaper_cap.html' else: logger.info("Text (instapaper) capture failed for %s" % target_url) text_capture = 'failed' asset_query.update( text_capture=text_capture, instapaper_timestamp=datetime.datetime.now(), instapaper_hash=smhasher.murmur3_x86_128(tdata), instapaper_id=bid ) else: # Must have received something other than an HTTP 200 from Instapaper, or no response object at all logger.info("Text (instapaper) capture failed for %s" % target_url) asset_query.update(text_capture='failed')
def render(self, context): asset = self.asset.resolve(context) linky = self.linky.resolve(context) id, cap = instapaper_capture(linky.submitted_url, linky.submitted_title) # Only say it's changed if we are sure it has changed. if asset.instapaper_hash is None or str(smhasher.murmur3_x86_128(cap)) == asset.instapaper_hash: return '' else: return ' (changed)'
def hash(self): """ The full hash of the file """ hash_ = self.size blocks = 0 with open(self.path, "rb") as f: data = f.read(BIG_BLOCKSIZE) while data: next_hash = smhasher.murmur3_x86_128(data, GenericFileWrapper.seed) hash_ ^= next_hash << blocks data = f.read(BIG_BLOCKSIZE) blocks = blocks + 1 if blocks < 16 else 0 return hash_
def hash(self): """ The full hash of the file """ hash_ = self.size blocks = 0 with open(self.path, 'rb') as f: data = f.read(BIG_BLOCKSIZE) while data: next_hash = smhasher.murmur3_x86_128(data, GenericFileWrapper.seed) hash_ ^= (next_hash << blocks) data = f.read(BIG_BLOCKSIZE) blocks = blocks + 1 if blocks < 16 else 0 return hash_
def mhash(data, n=32): """ Returns an N-bit hash value of provided data (string) generated with the Murmur3 hashing. n - is a number of bits in a final mask [0..128]. None means no masking. """ if not isinstance(data, basestring): try: data = str(data) except: data = unicode(data) if isinstance(data, unicode): data = data.encode('utf-8') value = murmur3_x86_128(data) if n is None: return value else: mask = (1L << n) - 1 return value & mask
def first_block_hash(self): with open(self.path, "rb") as f: data = f.read(SMALL_BLOCKSIZE) return smhasher.murmur3_x86_128(data, GenericFileWrapper.seed)
def first_block_hash(self): with open(self.path, 'rb') as f: data = f.read(SMALL_BLOCKSIZE) return smhasher.murmur3_x86_128(data, GenericFileWrapper.seed)