# sorting the two vector by pid, but it does NOT make many sense... # pid_vector, object_vector = zip(*sorted(zip(pids, system_objects))) # unpacking arguments by using `*` for record in raw_dataset: vector = list() pid = record["pid"] date = record["date"] obj = record["object"] vector.append(objects.index(obj)) # object index vector.append(pids.index(pid)) # event pid index vector.append(dates.index(date)) # date index vector.append(system_objects[obj]) # number of system obj occurrence vector.append(process_ids[pid]) # number of pid occurrence vectorized_dataset.append(tuple(vector)) ksom = SOM(10, 10, vectorized_dataset, 100) for iv in random.sample(vectorized_dataset, 100): # print iv # print objects[iv[0]], pids[iv[1]], dates[iv[2]] ksom.epoch(iv) pass line_width = 10 for index, node in enumerate(ksom.nodes): print ("(%d,%d):%d " % (node.x, node.y, node.bmu_count)), if (index + 1) % line_width is 0: print "" pass
vector_dict['3'], # token index of activity vector_dict['4'], # number of database operations vector_dict['5'], # number of content provider queries vector_dict['6'], # number of network operations ]''' dataset.append(vector) # dimension of the map (from config file) config_file = open('./som_config.js') configs = dict(map(lambda x:x[0:-1].split('='), config_file.read().strip().split('\n'))) config_values = configs.values() width = int(config_values[0]) height = int(config_values[1]) # init the self-organising map ksom = SOM(width, height, vector_dataset) # start training for activity_vector in random.sample(activity_dataset, len(activity_dataset)): ksom.epoch(activity_vector['vector'], activity_vector['name'], activity_vector['start_date']) pass # get SOM nodes collection in database collection = db.som_nodes # store the map to database for node in ksom.nodes: map_node = dict() map_node['x'] = node.x map_node['y'] = node.y map_node['features'] = node.weights_vector map_node['extra_data'] = node.extra_data