def classification(self): for category in self.r1.type_dic: d = {} dic_fvq = self.r1.type_dic[category] dic_num = self.r2.type_dic[category] fvq = sum(self.r1.type_dic[category].values()) num = sum(self.r2.type_dic[category].values()) fvq_th = int(round(fvq * self.min_ratio1)) num_th = int(round(num * self.min_ratio2)) for key in dic_fvq: #if (dic_fvq[key] >= fvq_th) and (dic_num[key] >= num_th): if (key not in d) and (key in self.dic): d[key] = (dic_fvq[key],dic_num[key]) self.type_dic[category] = d content = "" result = sorted(d.items(), key = lambda d: d[1][0])[::-1] path = self.fvq_dir + '/' + category + '.txt' for query,freq in result: line = (query + '\t' + str(freq) + '\n') content += line rw.writeFile(path,content) content = "" result = sorted(d.items(), key = lambda d: d[1][1])[::-1] path = self.num_dir + '/' + category + '.txt' for query,freq in result: line = (query + '\t' + str(freq) + '\n') content += line rw.writeFile(path,content)
def sort_files(self): query_log = os.listdir(self.src_dir) result = "" result_path = (self.result_dir + "/result.txt") for filename in query_log: if filename.endswith(self.suffix): result += (self.sort_file(filename) + "\n") report = ("Queries total number: %d\n" % self.total_num) \ + ("Queries over %d bytes in total: %d\n" % (self.min_char,self.total_num_sorted)) \ + ("Queries total frequency: %d\n" % self.total_freq) \ + ("Long queries total frequency: %d\n" % self.total_freq_sorted) \ + (("Long query ratio: %0.3f\n") % (float(self.total_num_sorted)/self.total_num)) \ + (("Long query frequency ratio: %0.3f\n") % (float(self.total_freq_sorted)/self.total_freq)) result += report result += "\n\n" for i in xrange(len(self.targets)): s = ("Long Queries: %d\n" % self.total_num_sorted) \ + ("Number of '%s': %d\n" % (self.targets[i],self.target_num_l[i])) \ + ("Long Queries frequency: %d\n" % self.total_freq_sorted) \ + ("Frequency of '%s': %d\n" % (self.targets[i],self.target_freq_l[i])) \ + ("Ratio: %0.3f\n" % (float(self.target_num_l[i])/self.total_num_sorted)) \ + ("Frequency Ratio: %0.3f\n" % (float(self.target_freq_l[i])/self.total_freq_sorted)) \ + "\n" result += s rw.writeFile(result_path,result)
def rank(self): dictionary = rw.readFile(self.dict_dir).split("\n")[0:-1] dic = set() sample = rw.readFile(self.res_dir).split("\n")[0:-1] words = {} high_freq = [] content = "" for line in dictionary: temp = line.split("\t") dic.add(temp[0]) for line in sample: temp = line.split(":") if (temp[0] in dic): high_freq.append(line) limit = int(round(len(high_freq) * self.ratio)) for line in high_freq[:limit]: #content += (line + "\n") temp = line.split(':') content += (temp[0] + '\n') self.dic_word[temp[0]] = int(temp[1]) rw.writeFile(self.rank_dir,content)
def init_result(self): content = "" result = sorted(self.analyze().items(),key=lambda d: d[1])[::-1] for word,num in result: if num >= self.min_num: line = (word + ':' + str(num) + '\n') content += line rw.writeFile(self.res_dir,content) return content
def update(self): self.dic_num.update(self.dic_score) self.dic = self.dic_num content = "" path = self.res_dir + '/General.txt' result = sorted(self.dic_num.items(), key = lambda d: d[1])[::-1] for query,num in result: line = (query + '\t' + str(num) + '\n') content += line rw.writeFile(path,content)
def update(self): self.dic_num.update(self.dic_score) self.dic = self.dic_num content = "" path = self.res_dir + '/General.txt' result = sorted(self.dic_num.items(), key=lambda d: d[1])[::-1] for query, num in result: line = (query + '\t' + str(num) + '\n') content += line rw.writeFile(path, content)
def rank(self): content = "" if self.mode == "sqv": result = sorted(self.addToDict_fvq().items(), key = lambda d: d[1])[::-1] elif self.mode == "num": result = sorted(self.addToDict_num().items(), key = lambda d: d[1])[::-1] for query,freq in result: line = (query + '\t' + str(freq) + '\n') content += line rw.writeFile(self.res_dir,content) return content
def filter(self,dic,filename): d = {} for key in dic: d[key] = self.evaluate(key,dic) content = "" path = self.res_dir + '/' + filename + '.txt' result = sorted(d.items(), key = lambda d: d[1][0])[::-1] for query,score in result: line = (query + '\t' + str(score) + '\n') content += line rw.writeFile(path,content) return (d,result)
def filter(self, dic, filename): d = {} for key in dic: d[key] = self.evaluate(key, dic) content = "" path = self.res_dir + '/' + filename + '.txt' result = sorted(d.items(), key=lambda d: d[1][0])[::-1] for query, score in result: line = (query + '\t' + str(score) + '\n') content += line rw.writeFile(path, content) return (d, result)
def rank(self): content = "" if self.mode == "sqv": result = sorted(self.addToDict_fvq().items(), key=lambda d: d[1])[::-1] elif self.mode == "num": result = sorted(self.addToDict_num().items(), key=lambda d: d[1])[::-1] for query, freq in result: line = (query + '\t' + str(freq) + '\n') content += line rw.writeFile(self.res_dir, content) return content
def find_NE(self, filename): src_path = self.src_dir + '/' + filename res_path = self.res_dir + '/' + filename txt = rw.readFile(src_path) content = "" for key in self.dic: txt = self.find_key(key,txt) p = re.compile('.*\[.+\].*') lines = p.findall(txt) for line in lines: content += (line + '\n') rw.writeFile(res_path,content) return content
def classification(self): for category in self.m.type_dic: d = {} for key in self.dic_num: if (key not in d) and (category in key): d[key] = self.dic_num[key] self.type_dic[category] = d content = "" result = sorted(d.items(), key=lambda d: d[1])[::-1] path = self.res_dir + '/' + category + '.txt' for query, num in result: line = (query + '\t' + str(num) + '\n') content += line rw.writeFile(path, content)
def sync(self): content = "" result = sorted(self.query_dic.items(),key = lambda d: d[1])[::-1] for query,freq in result: line = (query + '\t' + str(freq) + '\n') content += line for key in self.dic: content = self.find_key(key,content) txt = "" p = re.compile('.*\[.+\].*') lines = p.findall(content) for line in lines: txt += (line + '\n') rw.writeFile(self.total_dir,txt) return txt
def classification(self): for category in self.m.type_dic: d = {} for key in self.dic_num: if (key not in d) and (category in key): d[key] = self.dic_num[key] self.type_dic[category] = d content = "" result = sorted(d.items(), key = lambda d: d[1])[::-1] path = self.res_dir + '/' + category + '.txt' for query,num in result: line = (query + '\t' + str(num) + '\n') content += line rw.writeFile(path,content)
def segment(self,filename): print "********************" print "Segmenting File: %s" % filename src_path = self.src_dir + '/' + filename res_path = self.res_dir + '/' + filename query_log = rw.readFile(src_path).split('\n')[0:self.num] content = "" for line in query_log: temp = line.split('\t') #print "Segmenting: %s" % temp[0] segmented = self.run(temp[0]) if segmented != "": print "Segmenting: %s" % temp[0] query = temp[0] + '\t' + segmented + '\t' + temp[1] + '\n' content += query rw.writeFile(res_path,content)
def classification(self, nef): for category in nef.dic: path = self.class_dir + '/' + category + '.txt' d = {} for key in self.dic: if category in key: if key not in d: d[key] = self.dic[key] if category not in self.type_dic: self.type_dic[category] = d content = "" result = sorted(d.items(), key=lambda d: d[1])[::-1] for query, freq in result: line = (query + '\t' + str(freq) + '\n') content += line rw.writeFile(path, content)
def classification(self, nef): for category in nef.dic: path = self.class_dir + '/' + category + '.txt' d = {} for key in self.dic: if category in key: if key not in d: d[key] = self.dic[key] if category not in self.type_dic: self.type_dic[category] = d content = "" result = sorted(d.items(),key = lambda d: d[1])[::-1] for query,freq in result: line = (query + '\t' + str(freq) + '\n') content += line rw.writeFile(path,content)
def __init__(self, pid, neighbor): #define the peer id & its neighbor self.pid = pid self.neighbor = neighbor #inital msg_list & req_list file rw.writeFile('msg_list.txt', '') rw.writeFile('req_list.txt', '') #start server thread (details in server.py module) server4 = server.Server(self.pid, self.neighbor) server4.start() #start pushupdate (details in pushupdate.py module) pushupdate4 = pushupdate.Pushupdate(self.pid, self.neighbor) pushupdate4.start() #start client (details in client.py module) client4 = client.Client(self.pid, self.neighbor) client4.start()
def sub(self,filename): print "********************" print "Substituting File: %s" % filename src_path = self.src_dir + '/' + filename res_path = self.res_dir + '/' + filename txt = pre.sort_txt(rw.readFile(src_path),[0,1],20) query_log = txt.split('\n')[0:self.num] content = "" for line in query_log: temp = line.split('\t') substituted = self.run(temp[0]) if substituted != "": print "Substituting: %s" % temp[0] query = temp[0] + '\t' + substituted + '\t' + temp[1] + '\n' content += query rw.writeFile(res_path,content)
def sub(self, filename): print "********************" print "Substituting File: %s" % filename src_path = self.src_dir + '/' + filename res_path = self.res_dir + '/' + filename txt = pre.sort_txt(rw.readFile(src_path), [0, 1], 20) query_log = txt.split('\n')[0:self.num] content = "" for line in query_log: temp = line.split('\t') substituted = self.run(temp[0]) if substituted != "": print "Substituting: %s" % temp[0] query = temp[0] + '\t' + substituted + '\t' + temp[1] + '\n' content += query rw.writeFile(res_path, content)
def sort_file(self,filename): src_path = self.src_dir + "/" + filename tar_path = self.tar_dir + "/" + filename content = rw.readFile(src_path) query_list = content.split("\n") #constants num = len(query_list) num_sorted = 0 freq = 0 freq_sorted = 0 query_list_sorted = [] content_sorted = "" # sort the queries over min_char for query in query_list: temp = query.split("\t") if len(temp) > 1: freq += int(temp[-1]) if len(temp[0]) >= self.min_char: query_list_sorted.append(temp[0] + "\t" + temp[-1]) freq_sorted += int(temp[-1]) for target in self.targets: index = self.targets.index(target) count = (temp[0].count(target) > 0) * 1 self.target_num_l[index] += count#temp[0].count(target) self.target_freq_l[index] += count*int(temp[-1]) num_sorted = len(query_list_sorted) self.total_num += num self.total_num_sorted += num_sorted self.total_freq += freq self.total_freq_sorted += freq_sorted result = ("Query Log: %s\n" % filename) \ + ("Number of queries: %d\n" % num) \ + ("Queries over %d bytes: %d\n" % (self.min_char,num_sorted)) \ + ("Queries frequency: %d\n" % freq) \ + ("Long queries frequency: %d\n" % freq_sorted) \ + (("Long query ratio: %0.3f\n") % (float(num_sorted)/num)) \ + (("Long query frequency ratio: %0.3f\n") % (float(freq_sorted)/freq)) # recombine the sorted queries for query in query_list_sorted: content_sorted += (query + "\n") rw.writeFile(tar_path,content_sorted) return result
def filter(self): for key in self.dic_fvq: if (self.dic_fvq[key] >= self.fvq_th) and (self.dic_num[key] >= self.num_th): if key not in self.dic: self.dic[key] = (self.dic_fvq[key],self.dic_num[key]) content = "" result = sorted(self.dic.items(), key = lambda d: d[1][0])[::-1] path = self.res_dir + '/model_fvq.txt' for query,freq in result: line = (query + '\t' + str(freq) + '\n') content += line rw.writeFile(path,content) content = "" result = sorted(self.dic.items(), key = lambda d: d[1][1])[::-1] self.dic_list = result path = self.res_dir + '/model_num.txt' for query,freq in result: line = (query + '\t' + str(freq) + '\n') content += line rw.writeFile(path,content)
def sort_file(src_dir, res_dir, filename, cols, min_f): print "Pre-processing file: %s" % filename src_path = src_dir + '/' + filename res_path = res_dir + '/' + filename content = rw.readFile(src_path) query_list = content.split('\n')[0:-1] #constants query_list_sorted = [] content_sorted = "" for query in query_list: temp = query.split("\t") freq = int(temp[cols[-1]]) if freq >= min_f: line = "" for i in xrange(len(cols)-1): line += (temp[cols[i]] + '\t') line += temp[cols[-1]] content_sorted += (line + '\n') rw.writeFile(res_path,content_sorted) return content_sorted
def reconn(self): #reconnect to response peer self.s = socket.socket() self.host = socket.gethostname() self.port = int(self.mid.split('|')[1]) self.s.connect((self.host, self.port)) print 'reconnect to ' + self.mid self.s.send(self.msg) #download metadata & file from response peer metadata = json.loads(self.s.recv(1024)) #download file by chunks i = 0 content = '' while i != 1: chunk = self.s.recv(1024) if len(chunk) == 0: i = 1 else: content += chunk self.s.close() path = os.path.join(os.getcwd(), 'files', 'download', self.name) rw.writeFile(path, content) #read name_list from file 'req_list' name_list = rw.readList('req_list.txt') #update name_list name_list.remove(self.name) rw.write('req_list.txt', name_list) #update metadata meta_dict = rw.readDict('metadata.txt') if 'download' in meta_dict.keys(): meta_dict['download'][self.name] = metadata else: file_dict = {} file_dict[self.name] = metadata meta_dict['download'] = file_dict rw.write('metadata.txt', meta_dict) print 'receive file: ' + self.name
def filter(self): i = 0 while i < len(self.models): model = self.models[i] ask = model + ': ' ans = raw_input(ask) if ans == 'y': self.success += 1 self.dic[model] = (i, 1, self.success, self.failure) i += 1 elif ans == 'n': self.failure += 1 self.dic[model] = (i, 0, self.success, self.failure) i += 1 else: print "Not a valid response!" print "Filtering complete!" content = "" result = sorted(self.dic.items(), key=lambda d: d[1][0]) for query, score in result: line = (query + '\t' + str(score) + '\n') content += line rw.writeFile(self.res_dir, content)
def filter(self): i = 0 while i < len(self.models): model = self.models[i] ask = model + ': ' ans = raw_input(ask) if ans == 'y': self.success += 1 self.dic[model] = (i,1,self.success,self.failure) i += 1 elif ans == 'n': self.failure += 1 self.dic[model] = (i,0,self.success,self.failure) i += 1 else: print "Not a valid response!" print "Filtering complete!" content = "" result = sorted(self.dic.items(), key = lambda d: d[1][0]) for query,score in result: line = (query + '\t' + str(score) + '\n') content += line rw.writeFile(self.res_dir,content)