def search_signatures(indexname, level_filenames, feature_filenames): import correlate import smooth_avg level_signatures = [] for level_filename, feature_filename in zip(level_filenames, feature_filenames): level_signature = signature_read.read_signature_short(level_filename) level_signatures.append(level_signature) data = pickle.loads(open(indexname, "r").read()) search_results = [] for level_filename, feature_filename in zip(level_filenames, feature_filenames): level_signature = signature_read.read_signature_short(level_filename) feature_signature = signature_read.read_signature_byte( feature_filename) signature = signature_make.signature_make(level_signature, feature_signature) search_result = signature_make.signature_search_intersect( data, signature) search_result = [ (i, compare, correlate.pearson(smooth_avg.avg_smooth(level_signature), smooth_avg.avg_smooth(level_signatures[i]))) for i, compare in search_result ] # search_result = [(i, compare) for i, compare in search_result] search_results.append(search_result) return search_results
def main(): pattern_length = 100 pattern_step = 50 indexname = sys.argv[1] filename = sys.argv[2] patterns = pickle.loads(open(indexname, "r").read()) values = [] for line in open(filename, "r").readlines(): value = int(line.strip()) values.append(value) result = [] queue = deque() for i, v in enumerate(values): if len(queue) >= pattern_length: queue.popleft() queue.append(v) if len(queue) >= pattern_length and ((i + 1) % pattern_step) == 0: rank = [] for pindex, pattern in enumerate(patterns): rv = correlate.pearson(pattern, queue) rank.append((rv, pindex)) rank.sort(key = lambda x: -x[0]) result.append(str(rank[0][1])) print "_".join(result)
def main(): pattern_length = 100 pattern_step = 50 indexname = sys.argv[1] filename = sys.argv[2] patterns = pickle.loads(open(indexname, "r").read()) values = [] for line in open(filename, "r").readlines(): value = int(line.strip()) values.append(value) result = [] queue = deque() for i, v in enumerate(values): if len(queue) >= pattern_length: queue.popleft() queue.append(v) if len(queue) >= pattern_length and ((i + 1) % pattern_step) == 0: rank = [] for pindex, pattern in enumerate(patterns): rv = correlate.pearson(pattern, queue) rank.append((rv, pindex)) rank.sort(key=lambda x: -x[0]) result.append(str(rank[0][1])) print "_".join(result)
def create_graph_correlation(indexfile, filelist): index = pickle.loads(open(indexfile, "r").read()) db, filenames, filevalues = index graph = [] for index1, filename1 in enumerate(filenames): edges = [] for index2, filename2 in enumerate(filenames): if filename1 != filename2: correlation = correlate.pearson(filevalues[index1], filevalues[index2]) edges.append((index2, correlation)) graph.append(edges) data = (graph, [filename for filename, filesize in filenames]) print pickle.dumps(data)
def main(): listfile = sys.argv[1] listfilea = sys.argv[2] files = load_listfile(listfile) filesa = load_listfile(listfilea) data = [] dataa = [] for filename in files: values = [] for line in open(filename, "r").readlines(): vs = line.strip().split() value = int(vs[1]) + 2**15 values.append(value) data.append(frozenset(values)) for filename in filesa: values = [] for line in open(filename, "r").readlines(): vs = line.strip().split() value = int(vs[1]) + 2**15 values.append(float(value)) dataa.append(values) n = 20 sarray = [0 for i in xrange(0, n)] narray = [0 for i in xrange(0, n)] for i in xrange(0, len(data)): for j in xrange(i + 1, len(data)): similarity_p = correlate.pearson(dataa[i], dataa[j]) similarity = 2.0 * float(len(data[i].intersection( data[j]))) / float(len(data[i]) + len(data[j])) index = min(max(int(n * similarity), 0), n - 1) if similarity_p > 0.8: sarray[index] += 1 else: narray[index] += 1 index = 0 for svalue, nvalue in zip(sarray, narray): print str(index) + " " + str(svalue) + " " + str(nvalue) index += 1
def main(): listfile = sys.argv[1] listfilea = sys.argv[2] files = load_listfile(listfile) filesa = load_listfile(listfilea) data = [] dataa = [] for filename in files: values = [] for line in open(filename, "r").readlines(): vs = line.strip().split() value = int(vs[1]) + 2**15 values.append(value) data.append(frozenset(values)) for filename in filesa: values = [] for line in open(filename, "r").readlines(): vs = line.strip().split() value = int(vs[1]) + 2**15 values.append(float(value)) dataa.append(values) n = 20 sarray = [0 for i in xrange(0, n)] narray = [0 for i in xrange(0, n)] for i in xrange(0, len(data)): for j in xrange(i + 1, len(data)): similarity_p = correlate.pearson(dataa[i], dataa[j]) similarity = 2.0 * float(len(data[i].intersection(data[j]))) / float(len(data[i]) + len(data[j])) index = min(max(int(n * similarity), 0), n - 1) if similarity_p > 0.8: sarray[index] += 1 else: narray[index] += 1 index = 0 for svalue, nvalue in zip(sarray, narray): print str(index) + " " + str(svalue) + " " + str(nvalue) index += 1
def search_signatures(indexname, level_filenames, feature_filenames): import correlate import smooth_avg level_signatures = [] for level_filename, feature_filename in zip(level_filenames, feature_filenames): level_signature = signature_read.read_signature_short(level_filename) level_signatures.append(level_signature) data = pickle.loads(open(indexname, "r").read()) search_results = [] for level_filename, feature_filename in zip(level_filenames, feature_filenames): level_signature = signature_read.read_signature_short(level_filename) feature_signature = signature_read.read_signature_byte(feature_filename) signature = signature_make.signature_make(level_signature, feature_signature) search_result = signature_make.signature_search_intersect(data, signature) search_result = [(i, compare, correlate.pearson(smooth_avg.avg_smooth(level_signature), smooth_avg.avg_smooth(level_signatures[i]))) for i, compare in search_result] # search_result = [(i, compare) for i, compare in search_result] search_results.append(search_result) return search_results
def main(): threshold = float(sys.argv[2]) indexname = sys.argv[1] patterns = pickle.loads(open(indexname, "r").read()) patterns = patterns[::int(sys.argv[3])] values = [] matches = [] for i in xrange(0, len(patterns)): matches.append([i]) for k in xrange(i + 1, len(patterns)): value = correlate.pearson(patterns[i], patterns[k]) values.append((i, k, value)) if value > threshold: matches[i].append(k) firstresult = [] secondresult = [] rpatterns = [] for ps in matches: if len(ps) <= 1: continue plen = int(sys.argv[4]) presult = [] for k in xrange(0, plen): average = 0 for x in ps: average += patterns[x][k] average /= len(ps) presult.append(average) rpatterns.append(presult) for i, k, value in values: if value > threshold: firstresult.extend(patterns[i]) secondresult.extend(patterns[k]) print "(" + str(i) + " x " + str(k) + " = " + str(value) + ")" print pickle.dumps(rpatterns) ff = open(indexname + ".first", "w") sf = open(indexname + ".second", "w") rf = open(indexname + ".average", "w") for rpattern in rpatterns: for rvalue in rpattern: rf.write(str(rvalue) + "\n") rf.close() for value in firstresult[:]: ff.write(str(value) + "\n") ff.close() for value in secondresult[:]: sf.write(str(value) + "\n") sf.close()