Exemple #1
0
    def classification(self):
        for category in self.r1.type_dic:
            d = {}
            dic_fvq = self.r1.type_dic[category]
            dic_num = self.r2.type_dic[category]
            fvq = sum(self.r1.type_dic[category].values())
            num = sum(self.r2.type_dic[category].values())
            fvq_th = int(round(fvq * self.min_ratio1))
            num_th = int(round(num * self.min_ratio2))
            for key in dic_fvq:
                #if (dic_fvq[key] >= fvq_th) and (dic_num[key] >= num_th):
                if (key not in d) and (key in self.dic):
                    d[key] = (dic_fvq[key],dic_num[key])
            self.type_dic[category] = d

            content = ""
            result = sorted(d.items(), key = lambda d: d[1][0])[::-1]
            path = self.fvq_dir + '/' + category + '.txt'
            for query,freq in result:
                line = (query + '\t' + str(freq) + '\n')
                content += line
            rw.writeFile(path,content)
                
            content = ""
            result = sorted(d.items(), key = lambda d: d[1][1])[::-1]
            path = self.num_dir + '/' + category + '.txt'
            for query,freq in result:
                line = (query + '\t' + str(freq) + '\n')
                content += line
            rw.writeFile(path,content)
Exemple #2
0
    def sort_files(self):
        query_log = os.listdir(self.src_dir)
        result = ""
        result_path = (self.result_dir + "/result.txt")
        for filename in query_log:
            if filename.endswith(self.suffix):
                result += (self.sort_file(filename) + "\n")

        report = ("Queries total number: %d\n" % self.total_num) \
               + ("Queries over %d bytes in total: %d\n" % (self.min_char,self.total_num_sorted)) \
               + ("Queries total frequency: %d\n" % self.total_freq) \
               + ("Long queries total frequency: %d\n" % self.total_freq_sorted) \
               + (("Long query ratio: %0.3f\n") % (float(self.total_num_sorted)/self.total_num)) \
               + (("Long query frequency ratio: %0.3f\n") % (float(self.total_freq_sorted)/self.total_freq))
               
        result += report
        result += "\n\n"
        for i in xrange(len(self.targets)):
            s = ("Long Queries: %d\n" % self.total_num_sorted) \
              + ("Number of '%s': %d\n" % (self.targets[i],self.target_num_l[i])) \
              + ("Long Queries frequency: %d\n" % self.total_freq_sorted) \
              + ("Frequency of '%s': %d\n" % (self.targets[i],self.target_freq_l[i])) \
              + ("Ratio: %0.3f\n" % (float(self.target_num_l[i])/self.total_num_sorted)) \
              + ("Frequency Ratio: %0.3f\n" % (float(self.target_freq_l[i])/self.total_freq_sorted)) \
              + "\n" 
            result += s
        rw.writeFile(result_path,result)
Exemple #3
0
    def rank(self):
        dictionary = rw.readFile(self.dict_dir).split("\n")[0:-1]
        dic = set()
        sample = rw.readFile(self.res_dir).split("\n")[0:-1]
        words = {}
        high_freq = []
        content = ""

        for line in dictionary:
            temp = line.split("\t")
            dic.add(temp[0])

        for line in sample:
            temp = line.split(":")
            if (temp[0] in dic):
                high_freq.append(line)

        limit = int(round(len(high_freq) * self.ratio))
        for line in high_freq[:limit]:
            #content += (line + "\n")
            temp = line.split(':')
            content += (temp[0] + '\n')
            self.dic_word[temp[0]] = int(temp[1])

        rw.writeFile(self.rank_dir,content)
Exemple #4
0
 def init_result(self):
     content = ""
     result = sorted(self.analyze().items(),key=lambda d: d[1])[::-1]
     for word,num in result:
         if num >= self.min_num:
             line = (word + ':' + str(num) + '\n')
             content += line
     rw.writeFile(self.res_dir,content)
     return content
Exemple #5
0
 def update(self):
     self.dic_num.update(self.dic_score)
     self.dic = self.dic_num
     content = ""
     path = self.res_dir + '/General.txt'
     result = sorted(self.dic_num.items(), key = lambda d: d[1])[::-1]
     for query,num in result:
         line = (query + '\t' + str(num) + '\n')
         content += line
     rw.writeFile(path,content)
Exemple #6
0
 def update(self):
     self.dic_num.update(self.dic_score)
     self.dic = self.dic_num
     content = ""
     path = self.res_dir + '/General.txt'
     result = sorted(self.dic_num.items(), key=lambda d: d[1])[::-1]
     for query, num in result:
         line = (query + '\t' + str(num) + '\n')
         content += line
     rw.writeFile(path, content)
Exemple #7
0
 def rank(self):
     content = ""
     if self.mode == "sqv":
         result = sorted(self.addToDict_fvq().items(), key = lambda d: d[1])[::-1]
     elif self.mode == "num":
         result = sorted(self.addToDict_num().items(), key = lambda d: d[1])[::-1]
     for query,freq in result:
         line = (query + '\t' + str(freq) + '\n')
         content += line
     rw.writeFile(self.res_dir,content)
     return content
 def filter(self,dic,filename):
     d = {}
     for key in dic:
         d[key] = self.evaluate(key,dic)
     content = ""
     path = self.res_dir + '/' + filename + '.txt'
     result = sorted(d.items(), key = lambda d: d[1][0])[::-1]
     for query,score in result:
         line = (query + '\t' + str(score) + '\n')
         content += line
     rw.writeFile(path,content)
     return (d,result)
Exemple #9
0
 def filter(self, dic, filename):
     d = {}
     for key in dic:
         d[key] = self.evaluate(key, dic)
     content = ""
     path = self.res_dir + '/' + filename + '.txt'
     result = sorted(d.items(), key=lambda d: d[1][0])[::-1]
     for query, score in result:
         line = (query + '\t' + str(score) + '\n')
         content += line
     rw.writeFile(path, content)
     return (d, result)
Exemple #10
0
 def rank(self):
     content = ""
     if self.mode == "sqv":
         result = sorted(self.addToDict_fvq().items(),
                         key=lambda d: d[1])[::-1]
     elif self.mode == "num":
         result = sorted(self.addToDict_num().items(),
                         key=lambda d: d[1])[::-1]
     for query, freq in result:
         line = (query + '\t' + str(freq) + '\n')
         content += line
     rw.writeFile(self.res_dir, content)
     return content
Exemple #11
0
 def find_NE(self, filename):
     src_path = self.src_dir + '/' + filename
     res_path = self.res_dir + '/' + filename
     txt = rw.readFile(src_path)
     content = ""
     for key in self.dic:
         txt = self.find_key(key,txt)
     p = re.compile('.*\[.+\].*')
     lines = p.findall(txt)
     for line in lines:
         content += (line + '\n')
     rw.writeFile(res_path,content)
     return content
Exemple #12
0
    def classification(self):
        for category in self.m.type_dic:
            d = {}
            for key in self.dic_num:
                if (key not in d) and (category in key):
                    d[key] = self.dic_num[key]
            self.type_dic[category] = d

            content = ""
            result = sorted(d.items(), key=lambda d: d[1])[::-1]
            path = self.res_dir + '/' + category + '.txt'
            for query, num in result:
                line = (query + '\t' + str(num) + '\n')
                content += line
            rw.writeFile(path, content)
Exemple #13
0
 def sync(self):
     content = ""
     result = sorted(self.query_dic.items(),key = lambda d: d[1])[::-1]
     for query,freq in result:
         line = (query + '\t' + str(freq) + '\n')
         content += line
     for key in self.dic:
         content = self.find_key(key,content)
     txt = ""
     p = re.compile('.*\[.+\].*')
     lines = p.findall(content)
     for line in lines:
         txt += (line + '\n')
     rw.writeFile(self.total_dir,txt)
     return txt
Exemple #14
0
    def classification(self):
        for category in self.m.type_dic:
            d = {}
            for key in self.dic_num:
                if (key not in d) and (category in key):
                    d[key] = self.dic_num[key]
            self.type_dic[category] = d

            content = ""
            result = sorted(d.items(), key = lambda d: d[1])[::-1]
            path = self.res_dir + '/' + category + '.txt'
            for query,num in result:
                line = (query + '\t' + str(num) + '\n')
                content += line
            rw.writeFile(path,content)
Exemple #15
0
 def segment(self,filename):
     print "********************"
     print "Segmenting File: %s" % filename
     src_path = self.src_dir + '/' + filename
     res_path = self.res_dir + '/' + filename
     query_log = rw.readFile(src_path).split('\n')[0:self.num]
     content = ""
     for line in query_log:
         temp = line.split('\t')
         #print "Segmenting: %s" % temp[0]
         segmented = self.run(temp[0])
         if segmented != "":
             print "Segmenting: %s" % temp[0]
             query = temp[0] + '\t' + segmented + '\t' + temp[1] + '\n'
             content += query
     rw.writeFile(res_path,content)
Exemple #16
0
 def classification(self, nef):
     for category in nef.dic:
         path = self.class_dir + '/' + category + '.txt'
         d = {}
         for key in self.dic:
             if category in key:
                 if key not in d:
                     d[key] = self.dic[key]
         if category not in self.type_dic:
             self.type_dic[category] = d
         content = ""
         result = sorted(d.items(), key=lambda d: d[1])[::-1]
         for query, freq in result:
             line = (query + '\t' + str(freq) + '\n')
             content += line
         rw.writeFile(path, content)
Exemple #17
0
 def classification(self, nef):
     for category in nef.dic:
         path = self.class_dir + '/' + category + '.txt' 
         d = {}
         for key in self.dic:
             if category in key:
                 if key not in d:
                     d[key] = self.dic[key]
         if category not in self.type_dic:
             self.type_dic[category] = d
         content = ""
         result = sorted(d.items(),key = lambda d: d[1])[::-1]
         for query,freq in result:
             line = (query + '\t' + str(freq) + '\n')
             content += line
         rw.writeFile(path,content)
Exemple #18
0
	def __init__(self, pid, neighbor):
		#define the peer id & its neighbor
		self.pid = pid
		self.neighbor = neighbor
		#inital msg_list & req_list file
		rw.writeFile('msg_list.txt', '')
		rw.writeFile('req_list.txt', '')
		#start server thread (details in server.py module)
		server4 = server.Server(self.pid, self.neighbor)
		server4.start()
		#start pushupdate (details in pushupdate.py module)
		pushupdate4 = pushupdate.Pushupdate(self.pid, self.neighbor)
		pushupdate4.start()
		#start client (details in client.py module)
		client4 = client.Client(self.pid, self.neighbor)
		client4.start()
 def sub(self,filename):
     print "********************"
     print "Substituting File: %s" % filename
     src_path = self.src_dir + '/' + filename
     res_path = self.res_dir + '/' + filename
     txt = pre.sort_txt(rw.readFile(src_path),[0,1],20)
     query_log = txt.split('\n')[0:self.num]
     content = ""
     for line in query_log:
         temp = line.split('\t')
         substituted = self.run(temp[0])
         if substituted != "":
             print "Substituting: %s" % temp[0]
             query = temp[0] + '\t' + substituted + '\t' + temp[1] + '\n'
             content += query
     rw.writeFile(res_path,content)
Exemple #20
0
 def sub(self, filename):
     print "********************"
     print "Substituting File: %s" % filename
     src_path = self.src_dir + '/' + filename
     res_path = self.res_dir + '/' + filename
     txt = pre.sort_txt(rw.readFile(src_path), [0, 1], 20)
     query_log = txt.split('\n')[0:self.num]
     content = ""
     for line in query_log:
         temp = line.split('\t')
         substituted = self.run(temp[0])
         if substituted != "":
             print "Substituting: %s" % temp[0]
             query = temp[0] + '\t' + substituted + '\t' + temp[1] + '\n'
             content += query
     rw.writeFile(res_path, content)
Exemple #21
0
    def sort_file(self,filename):
        src_path = self.src_dir + "/" + filename
        tar_path = self.tar_dir + "/" + filename
        content = rw.readFile(src_path)
        query_list = content.split("\n")

        #constants
        num = len(query_list)
        num_sorted = 0
        freq = 0
        freq_sorted = 0
        query_list_sorted = []
        content_sorted = ""
        # sort the queries over min_char
        for query in query_list:
            temp = query.split("\t")
            if len(temp) > 1:
                freq += int(temp[-1])
            if len(temp[0]) >= self.min_char:
                query_list_sorted.append(temp[0] + "\t" + temp[-1])
                freq_sorted += int(temp[-1])
                for target in self.targets:
                    index = self.targets.index(target)
                    count = (temp[0].count(target) > 0) * 1
                    self.target_num_l[index] += count#temp[0].count(target)
                    self.target_freq_l[index] += count*int(temp[-1])

        num_sorted = len(query_list_sorted)
        self.total_num += num
        self.total_num_sorted += num_sorted
        self.total_freq += freq
        self.total_freq_sorted += freq_sorted

        result = ("Query Log: %s\n" % filename) \
               + ("Number of queries: %d\n" % num) \
               + ("Queries over %d bytes: %d\n" % (self.min_char,num_sorted)) \
               + ("Queries frequency: %d\n" % freq) \
               + ("Long queries frequency: %d\n" % freq_sorted) \
               + (("Long query ratio: %0.3f\n") % (float(num_sorted)/num)) \
               + (("Long query frequency ratio: %0.3f\n") % (float(freq_sorted)/freq))

        # recombine the sorted queries
        for query in query_list_sorted:
            content_sorted += (query + "\n")

        rw.writeFile(tar_path,content_sorted)
        return result
Exemple #22
0
    def filter(self):
        for key in self.dic_fvq:
            if (self.dic_fvq[key] >= self.fvq_th) and (self.dic_num[key] >= self.num_th):
                if key not in self.dic:
                    self.dic[key] = (self.dic_fvq[key],self.dic_num[key])
        content = ""
        result = sorted(self.dic.items(), key = lambda d: d[1][0])[::-1]
        path = self.res_dir + '/model_fvq.txt'
        for query,freq in result:
            line = (query + '\t' + str(freq) + '\n')
            content += line
        rw.writeFile(path,content)

        content = ""
        result = sorted(self.dic.items(), key = lambda d: d[1][1])[::-1]
        self.dic_list = result
        path = self.res_dir + '/model_num.txt'
        for query,freq in result:
            line = (query + '\t' + str(freq) + '\n')
            content += line
        rw.writeFile(path,content)
Exemple #23
0
def sort_file(src_dir, res_dir, filename, cols, min_f):
    print "Pre-processing file: %s" % filename
    src_path = src_dir + '/' + filename
    res_path = res_dir + '/' + filename
    content = rw.readFile(src_path)
    query_list = content.split('\n')[0:-1]

    #constants
    query_list_sorted = []
    content_sorted = ""

    for query in query_list:
        temp = query.split("\t")
        freq = int(temp[cols[-1]])
        if freq >= min_f:
            line = ""
            for i in xrange(len(cols)-1):
                line += (temp[cols[i]] + '\t')
            line += temp[cols[-1]]
            content_sorted += (line + '\n')
    rw.writeFile(res_path,content_sorted)
    return content_sorted
Exemple #24
0
	def reconn(self):
		#reconnect to response peer
		self.s = socket.socket()
		self.host = socket.gethostname()
		self.port = int(self.mid.split('|')[1])
		
		self.s.connect((self.host, self.port))
		print 'reconnect to ' + self.mid
		self.s.send(self.msg)
		#download metadata & file from response peer
		metadata = json.loads(self.s.recv(1024))
		#download file by chunks
		i = 0
		content = ''
		while i != 1:
			chunk = self.s.recv(1024)
			if len(chunk) == 0:
				i = 1
			else:
				content += chunk
		self.s.close()
		path = os.path.join(os.getcwd(), 'files', 'download', self.name)
		rw.writeFile(path, content)
		#read name_list from file 'req_list'
		name_list = rw.readList('req_list.txt')
		#update name_list
		name_list.remove(self.name)
		rw.write('req_list.txt', name_list)
		#update metadata
		meta_dict = rw.readDict('metadata.txt')
		if 'download' in meta_dict.keys():
			meta_dict['download'][self.name] = metadata
		else:
			file_dict = {}
			file_dict[self.name] = metadata
			meta_dict['download'] = file_dict
		rw.write('metadata.txt', meta_dict)
		print 'receive file: ' + self.name
Exemple #25
0
 def filter(self):
     i = 0
     while i < len(self.models):
         model = self.models[i]
         ask = model + ': '
         ans = raw_input(ask)
         if ans == 'y':
             self.success += 1
             self.dic[model] = (i, 1, self.success, self.failure)
             i += 1
         elif ans == 'n':
             self.failure += 1
             self.dic[model] = (i, 0, self.success, self.failure)
             i += 1
         else:
             print "Not a valid response!"
     print "Filtering complete!"
     content = ""
     result = sorted(self.dic.items(), key=lambda d: d[1][0])
     for query, score in result:
         line = (query + '\t' + str(score) + '\n')
         content += line
     rw.writeFile(self.res_dir, content)
 def filter(self):
     i = 0
     while i < len(self.models):
         model = self.models[i]
         ask = model + ': '
         ans = raw_input(ask)
         if ans == 'y':
             self.success += 1
             self.dic[model] = (i,1,self.success,self.failure)
             i += 1
         elif ans == 'n':
             self.failure += 1
             self.dic[model] = (i,0,self.success,self.failure)
             i += 1
         else:
             print "Not a valid response!"
     print "Filtering complete!"
     content = ""
     result = sorted(self.dic.items(), key = lambda d: d[1][0])
     for query,score in result:
         line = (query + '\t' + str(score) + '\n')
         content += line
     rw.writeFile(self.res_dir,content)