def update_url_data(self,url): """ If url is not in db then it is inserted into db, Esle reevaluate the weight of the url. """ # put url to datbase url_hash=utils.calc_hash(url) key_id=self.crawlerdb.check_db(url,url_hash) if key_id==None: self.crawlerdb.add_url(url,url_hash) # VERY LIKELY to add the next line code:to initlize rank info of url, # or we always get only the fitst url in db from get_url() func. # self.crawlerfb.add_rank_info() if settings.DEBUG_FLAG: print 'insert %s, %s in update_url_data' %(url,url_hash) # #IMPROVE(ifkite): use namedtule # result_book=parse_page(url)#return tuple # ext_result_book=utils.merge_tups(result_book,new_key_id) # add_page(ext_result_book) # evalu=rank_page() # add_evalu(evalu) elif key_id>0: self.update_evaluate(key_id)#page has been in db else: self.handle_collision()
def __init__(self, index, data, previous_hash): """Builds a block calculating its hash from he previous one.""" self.index = index self.timestamp = get_block_timestamp() self.data = data self.previous_hash = previous_hash self.hash = calc_hash(self.index, self.timestamp, str(self.data), self.previous_hash)
def crypto(file_name, direction): queue.push( { "file_acted_upon": file_name, "action": direction, "old_hash": old_hash, "new_hash": utils.calc_hash(file_name) }, "crypto") return
def get_functions(): f = lambda data: {"data": data, "hash": utils.calc_hash(data)} functions = { "aaa": f(["aaa"]), "bbb": f(["bbb"]), "ooo": f(["ooo"]), "foo": f(["foo"]), "baz": f(["baz"]), "bar-foo": f(["bar", utils.calc_hash(["foo"])]), "bar-foo-baz": f([utils.calc_hash(["bar", utils.calc_hash(["foo"])]), "baz"]), } return functions
def build_file(content, tags, _from): h = utils.calc_hash(content) return ( h, { "data": content, # "tags": tags, # "when_modified": [utils.get_time()], # "when_accessed": [], "from": _from, }, )
def get_parsers(): parsers = { "parser-a": { "data": ["parse", ["text", "Language A"]] }, "parser-b": { "data": ["parse", ["text", "Language B"]] }, "parser-spec": { "data": ["parse", ["text", "Language Spec"]] }, } for k in parsers.keys(): parsers[k]["hash"] = utils.calc_hash(parsers[k]["data"]) return parsers
def get_binaries(): binaries = {} for s in [ "compile-default", "parser-a", "parser-b", "parser-spec", "list", "print", "apple", "orange", "banana", ]: binaries[s] = {"data": f"[BINARY CONTENT ({s})]"} for k in binaries.keys(): binaries[k]["hash"] = utils.calc_hash(binaries[k]["data"]) return binaries
def get_sources(): sources = { "parser-a": { "data": "this is fake source code that is parsed via 'parser-a' into a Language A parser" }, "parser-b": { "data": "this is fake source code that is parsed via 'parser-a' into a Language B parser" }, "parser-spec": { "data": "this is fake source code that is parsed via 'parser-a' into a Language Spec parser" }, "compile-default": { "data": "this is fake source code that is parsed via 'parser-a' into a compiler" }, "foo": { "data": "fake source code: [foo]" }, "baz": { "data": "fake source code: [baz]" }, "bar-foo": { "data": "fake source code: [bar [foo]]" }, "bar-foo-baz": { "data": "fake source code: [[bar [foo]] baz]" }, "aaa": { "data": "fake source code: [aaa]" }, "bbb": { "data": "fake source code: [bbb]" }, "ooo": { "data": "fake source code: [ooo]" }, } for k in sources.keys(): sources[k]["hash"] = utils.calc_hash(sources[k]["data"]) return sources
def get_programs(functions): programs = { "apple": { "data": [{ "entry": functions["aaa"]["hash"] }] }, "orange": { "data": [{ "entry": functions["ooo"]["hash"] }] }, "banana": { "data": [{ "entry": functions["bbb"]["hash"] }] }, "parser-a": { "data": [{ "entry": functions["foo"]["hash"] }] }, "parser-b": { "data": [{ "entry": functions["baz"]["hash"] }] }, "parser-spec": { "data": [{ "entry": functions["bar-foo"]["hash"] }] }, "compile-default": { "data": [{ "entry": functions["bar-foo-baz"]["hash"] }] }, } for k in programs.keys(): programs[k]["hash"] = utils.calc_hash(programs[k]["data"]) return programs
def test_mydb_check_db_case2(self): url='http://book.douban.com/subject/1863930/' url_hash=utils.calc_hash(url) self.crawlerdb.add_url(url,url_hash) assert self.crawlerdb.check_db(url,url_hash)>0