def add_partition(self, drv_name, cyl_range, dev_flags=0, flags=0, dos_type=DosType.DOS0, boot_pri=0): # cyl range is not free anymore or invalid if not self.check_cyl_range(*cyl_range): return False # no space left for partition block if not self._has_free_rdb_blocks(1): return False # allocate block for partition blk_num = self._alloc_rdb_blocks(1)[0] self.used_blks.append(blk_num) self._update_hi_blk() # crete a new parttion block pb = PartitionBlock(self.rawblk, blk_num) heads = self.rdb.phy_drv.heads blk_per_trk = self.rdb.phy_drv.secs dos_env = PartitionDosEnv(low_cyl=cyl_range[0], high_cyl=cyl_range[1], surfaces=heads, \ blk_per_trk=blk_per_trk, dos_type=dos_type, boot_pri=boot_pri) pb.create(drv_name, dos_env, flags=flags) pb.write() # link block if len(self.parts) == 0: # write into RDB self.rdb.part_list = blk_num else: # write into last partition block last_pb = self.parts[-1] last_pb.part_blk.next = blk_num last_pb.write() # always write RDB as allocated block is stored there, too self.rdb.write() # create partition object and add to partition list p = Partition(self.rawblk, blk_num, len(self.parts), blk_per_trk, self) p.read() self.parts.append(p) return True
def get_log_info(self, log_file): log_messages = [] structure_log_info = [] if self.df_log.empty: start = 0 end = 0 else: start = self.df_log.shape[0] end = start count = 0 now = datetime.now() with open(log_file, 'r') as fin: for line in fin.readlines(): # 把非ASCII码信息替换为<NASCII>标签 line = re.sub(r'[^\x00-\x7F]+', '<NASCII>', line) # 把标签外的日志信息提取出来 try: end += 1 match = self.format_rex.search(line.strip()) # match.group(header),可按之前正则表达式中的命名进行分组 message = [match.group(header) for header in self.headers] log_messages.append(message) content = list( filter( lambda x: x != '', re.split(r'[\s=:,]', self._preprocess( match.group('Content'))))) lineId = end length = len(content) # 每个info可看做一个分区 info = Partition(logClustL=[]) info.rootNode = Node() info.logClustL.append( LCSObject(logTemplate=content, logIDL=[lineId], constLogTemplate=content)) self._addSeqToPrefixTree(info.rootNode, info.logClustL[0]) partition = (length, info) structure_log_info.append(partition) count += 1 if count % 100000 == 0: print(count) print(datetime.now() - now) now = datetime.now() except Exception as e: # print("抛出异常: "+str(e)) pass logdf = pd.DataFrame(log_messages, columns=self.headers) logdf.insert(0, 'LineId', None) logdf['LineId'] = [i + 1 for i in range(start, end)] if not self.df_log.empty: self.df_log = self.df_log.append(logdf, ignore_index=True) else: self.df_log = logdf return structure_log_info
def __init__(self, path): self.Documents = [] self.allowed = set([chr(i) for i in xrange(ord('a'), ord('z')+1)]+ \ [chr(i) for i in xrange(ord('A'), ord('Z')+1)] + \ #[',','-',' '] + [str(i) for i in xrange(10)]) [',','.','?','-','!',' '] + [str(i) for i in xrange(10)]) self.punctuation = [';', ':', '&', '?', "/"] self.P = Partition(self.punctuation) self.tagger = PatternTagger() self.sw = StopWords() with open(path, 'r') as f: for line in f: line = line.strip() if line: self.Documents.append(line)
def __init__(self, knowledge): self.knowledge = knowledge self.num_user_turns = 0 # Number of possible actions the goal could be referring to self.num_goal_actions = len(knowledge.goal_actions) self.num_utterance_actions = self.num_goal_actions + 1 # As action is utterance is allowed to be None # n - Maximum possible number of parameters an action can have self.max_goal_params = len(knowledge.goal_params) # Number of possible values each parameter can take self.num_goal_param_values = len(knowledge.goal_params_values) # Create a partition with all possible states possible_actions = knowledge.goal_actions possible_param_values = dict() possible_param_values['patient'] = knowledge.goal_params_values possible_param_values['location'] = knowledge.goal_params_values possible_param_values['recipient'] = knowledge.goal_params_values complete_partition = Partition(possible_actions, possible_param_values, 0.0) self.partitions = [complete_partition] # Hypotheses currently being tracked self.hypothesis_beliefs = None self.num_dialog_turns = 0
def open(self): # read RDB self.rdb = RDBlock(self.rawblk) if not self.rdb.read(): self.valid = False return False # create used block list self.used_blks = [self.rdb.blk_num] # read partitions part_blk = self.rdb.part_list self.parts = [] num = 0 while part_blk != Block.no_blk: p = Partition(self.rawblk, part_blk, num, self.rdb.log_drv.cyl_blks, self) num += 1 if not p.read(): self.valid = False return False self.parts.append(p) # store used block self.used_blks.append(p.get_blk_num()) # next partition part_blk = p.get_next_partition_blk() # read filesystems fs_blk = self.rdb.fs_list self.fs = [] num = 0 while fs_blk != PartitionBlock.no_blk: fs = FileSystem(self.rawblk, fs_blk, num) num += 1 if not fs.read(): self.valid = False return False self.fs.append(fs) # store used blocks self.used_blks += fs.get_blk_nums() # next partition fs_blk = fs.get_next_fs_blk() # TODO: add bad block blocks self.valid = True self.max_blks = self.rdb.log_drv.rdb_blk_hi + 1 return True
def getDisks(self): """ Enumerate all the disks present on the system, updating the cmdb object accordingly Create a disk instance for each disk on the system, populate the attributes and add it to the cmdb's hardDisks collection @return: cmdb.hardDisks """ disks = j.cloud.cmdtools.inventoryScan.getDisks() currentAvailableDisks = list() for name, value in disks.iteritems(): size = int(float(value['size']) * 1024) if value['unit'] == 'GB' else int( float(value['size'])) partitions = value['partitions'] currentAvailableDisks.append(name) if name in self.cmdb.disks.keys(): self.cmdb.disks[name].name = name self.cmdb.disks[name].size = size else: disk = Disk() disk.name = name disk.size = size self.cmdb.disks[name] = disk if partitions: disk = self.cmdb.disks[name] disk.partitions = list() for part in partitions: partition = Partition( part['Type'], part['number'], part['start'], part['end'], int(float(part['size'][0:-3])), part['mountpoint'] if 'mountpoint' in part else '', part['used'] if 'used' in part else 0.0, part['name'] if 'name' in part else '', part['flag'] if 'flag' in part else '') if 'devices' in part: partition.raid = PartitionRaid( part['level'], part['state'], part['devices'], part['activeDevices'], part['failedDevices'], part['totalDevices'], part['raidDevices'], part['spareDevices'], part['backendsize']) disk.partitions.append(partition) for disk in self.cmdb.disks.keys(): if disk not in currentAvailableDisks: del self.cmdb.disks[disk] self.cmdb.dirtyProperties.add('disks') return disks
def add_partition(self, drv_name, cyl_range, dev_flags=0, flags=0, dos_type=DosType.DOS0, boot_pri=0, more_dos_env=None): # cyl range is not free anymore or invalid if not self.check_cyl_range(*cyl_range): return False # no space left for partition block if not self._has_free_rdb_blocks(1): return False # allocate block for partition blk_num = self._alloc_rdb_blocks(1)[0] self.used_blks.append(blk_num) self._update_hi_blk() # crete a new parttion block pb = PartitionBlock(self.rawblk, blk_num) # setup dos env heads = self.rdb.phy_drv.heads blk_per_trk = self.rdb.phy_drv.secs dos_env = PartitionDosEnv(low_cyl=cyl_range[0], high_cyl=cyl_range[1], surfaces=heads, \ blk_per_trk=blk_per_trk, dos_type=dos_type, boot_pri=boot_pri) self._adjust_dos_env(dos_env, more_dos_env) pb.create(drv_name, dos_env, flags=flags) pb.write() # link block if len(self.parts) == 0: # write into RDB self.rdb.part_list = blk_num else: # write into last partition block last_pb = self.parts[-1] last_pb.part_blk.next = blk_num last_pb.write() # always write RDB as allocated block is stored there, too self.rdb.write() # flush out all changes before we read again self.rawblk.flush() # create partition object and add to partition list blk_per_cyl = blk_per_trk * heads p = Partition(self.rawblk, blk_num, len(self.parts), blk_per_cyl, self) p.read() self.parts.append(p) return True
def new_box(self): output = [] for i in range(self.shape[0]): row = [] for j in range(self.shape[1]): row.append(Partition((i,j))) output.append(row) return output
def MST_Kruskal(g): """Compute a minimum spanning tree of a graph using Kruskal s algorithm Return a list of edges that comprise the MST The elements of the graph's edges are assumed to be weights """ tree = [] pq = AdaptableHeapPriorityQueue() forest = Partition() position = {} for v in g.vertices(): position[v] = forest.make_group(v) for e in g.edges(): pq.add(e.element(), e) # edge’s element is assumed to be its weight size = g.vertex_count() while len(tree) != size - 1 and len(pq) != 0: # tree not spanning and unprocessed edges remain wgt, e = pq.remove_min()[1] u, v = e.endpoints() a, b = forest.find(position[u]), forest.find(position[v]) if a != b: forest.union(a, b) tree.append(e) return tree
def getDisks(self): """ Enumerate all the disks present on the system, updating the cmdb object accordingly Create a disk instance for each disk on the system, populate the attributes and add it to the cmdb's hardDisks collection @return: cmdb.hardDisks """ disks = j.cloud.cmdtools.inventoryScan.getDisks() currentAvailableDisks = list() for name, value in disks.iteritems(): size = int(float(value['size']) * 1024) if value['unit'] == 'GB' else int(float(value['size'])) partitions = value['partitions'] currentAvailableDisks.append(name) if name in self.cmdb.disks.keys(): self.cmdb.disks[name].name = name self.cmdb.disks[name].size = size else: disk = Disk() disk.name = name disk.size = size self.cmdb.disks[name] = disk if partitions: disk = self.cmdb.disks[name] disk.partitions = list() for part in partitions: partition = Partition(part['Type'], part['number'], part['start'], part['end'], int(float(part['size'][0:-3])), part['mountpoint'] if 'mountpoint' in part else '', part['used'] if 'used' in part else 0.0, part['name'] if 'name' in part else '', part['flag'] if 'flag' in part else '') if 'devices' in part: partition.raid = PartitionRaid(part['level'], part['state'], part['devices'], part['activeDevices'], part['failedDevices'], part['totalDevices'], part['raidDevices'], part['spareDevices'], part['backendsize']) disk.partitions.append(partition) for disk in self.cmdb.disks.keys(): if disk not in currentAvailableDisks: del self.cmdb.disks[disk] self.cmdb.dirtyProperties.add('disks') return disks
def __init__(self, shape = (0,0)) -> None: self.shape = shape self.partitions = [] for i in range(shape[0]): row = [] for j in range(shape[1]): row.append(Partition((i,j))) self.partitions.append(row)
def __init__(self, path): data_home = os.path.split(path)[0] self.Documents = [] self.allowed = set([chr(i) for i in xrange(ord('a'), ord('z')+1)]+ \ [chr(i) for i in xrange(ord('A'), ord('Z')+1)] + \ #[',','-',' '] + [str(i) for i in xrange(10)]) [',','.','?','-','!',' '] + [str(i) for i in xrange(10)]) punctuation = [';', ':', '&', '?', "/"] #P = Partition(punctuation) self.tagger = PatternTagger() with open(path, 'r') as f: for line in f.readlines(): li = line.split("\t")[1].strip() if li: self.Documents.append(li) data_Inter_path = os.path.join(data_home, "Intermediate") self.inter = data_Inter_path self.P = Partition(punctuation, data_Inter_path, data_home) self.sw = StopWords(data_home)
def MST_Kruskal(g): """ compute a minimum spanning tree of a graph using Kruskal's algorithm return a list of edges that comprise the MST the elements of the graph's edges are assumed to be weights """ tree = [] # list of edges in spanning tree pq = HeapPriorityQueue() # entries are edges in G with weights as key forest = Partition() # keeps track o forest clusters position = {} # map each node to its Partition entry for v in g.vertices(): position[v] = forest.make_group(v) for e in g.edges(): pq.add(e.element(), e) # edge's element is assumed to be its weight size = g.vertex_count() while len(tree) != size - 1 and not pq.is_empty(): # tree not spanning and unprocessed edges remain weight, edge = pq.remove_min() u, v = edge.endpoints() a = forest.find(position[u]) b = forest.find(position[v]) if a != b: tree.append(edge) forest.union(a, b) return tree
def MST_Kruskal(g): tree = [] pq = HeapPriorityQueue() forest = Partition() position = {} for v in g.vertices(): position[v] = forest.make_group(v) for e in g.edges(): pq.add(e.element(), e) size = g.vertex_count() while len(tree) != size - 1 and not pq.is_empty(): weight, edge = pq.remove_min() u, v = edge.endpoints() a = forest.find(position[u]) b = forest.find(position[v]) if a != b: tree.append(edge) forest.union(a, b) return tree
def __partitionMatch(self, logmessageL, logID): """ 将logmessageL与对应分组的日志类型进行LCS匹配比较 :param logmessageL: 待匹配的日志信息 :param logID: 日志ID :return: 匹配结果 """ length = len(logmessageL) LCSClust = None # 如果该长度的模板并未创建分组,则直接创建分组 if length not in self.group: # 如果进行读取训练结果的操作,那么此时的logmessageL是以前训练的日志模板,不存有ID,以负数表示 if logID > 0: LCSClust = LCSObject(logmessageL, [logID], [s for s in logmessageL if s != '<*>']) else: LCSClust = LCSObject(logmessageL, [], [s for s in logmessageL if s != '<*>']) LCSClustL = [LCSClust] self.group[length] = Partition(LCSClustL) # 如果已存在该长度的分组,那么就在组内进行匹配 else: LCS, matchObject = self._LCSMatch(self.group[length].logClustL, logmessageL, self.max) # 匹配失败则添加新模板 if not LCS: if logID > 0: LCSClust = LCSObject( logmessageL, [logID], [s for s in logmessageL if s != '<*>']) else: LCSClust = LCSObject( logmessageL, [], [s for s in logmessageL if s != '<*>']) self.group[length].logClustL.append(LCSClust) # 匹配成功则取LCS为新的模板,删除原模板的信息,在merge阶段决定是否添加 else: template = self._getTemplate(LCS, logmessageL) if ' '.join(matchObject.logTemplate) != ' '.join(template): LCSClust = matchObject matchObject.logTemplate = template matchObject.constLogTemplate = [ s for s in template if s != '<*>' ] if logID > 0: matchObject.logIDL.append(logID) if matchObject in self.logClustL: self.logClustL.remove(matchObject) self._removeSeqFromPrefixTree(self.rootNode, matchObject) return LCSClust
def __init__(self, path): self.Documents = [] self.allowed = set([chr(i) for i in xrange(ord('a'), ord('z')+1)]+ \ [chr(i) for i in xrange(ord('A'), ord('Z')+1)] + \ #[',','-',' '] + [str(i) for i in xrange(10)]) [',','.','?','-','!',' '] + [str(i) for i in xrange(10)]) self.punctuation = [';',':','&', '?', "/"] self.P = Partition(self.punctuation) self.tagger = PatternTagger() self.sw = StopWords() with open(path,'r') as f: for line in f: line = line.strip() if line: self.Documents.append(line)
def generate_partitions(self, target_af): ''' Args: - `target_af`: Total af of all partitions to reach. ''' partition_set = [] afs = self.gen_kato_utilizations(target_af,0.1, 1)#generate utilizations based on the number of tasks generated num = len(afs) for i in range(num): reg = random.randint(1,2) partition_now = Partition(afs[i], reg)#only generates regular partitions #print afs[i] partition_set.append(partition_now) return partition_set
def generate_partitions(self, target_af): ''' Args: - `target_af`: Total af of all partitions to reach. ''' partition_set = {} afs = self.gen_kato_utilizations( target_af, 0, 1) #generate utilizations based on the number of tasks generated num = len(afs) for i in range(num): partition_now = Partition( i, afs[i]) #only generates regular partitions #print afs[i] partition_set[i] = partition_now return partition_set
def system_init(): print('type: {}'.format(type(osm_map))) # sub_graph = osm_map.subgraph([4548141057, 4548141062, 4548141067, 1457872913, 4548141073]) # E = list(sub_graph.nodes) # print(E) # input() print('System Initiating...') taxi_table = pd.read_csv('./data/taxi_info_list.csv') df = pd.read_csv('./data/node_list_with_cluster.csv') for indexs in df.index: tmp = df.loc[indexs] node_list.append( Node(tmp['real_id'], tmp['lon'], tmp['lat'], int(tmp['cluster_id']))) # .里面包含的内容是每个partition的landmark的经纬度.其下标与partition_list的下标一一对应 landmark_table = pd.read_csv('./data/landmark.csv') global landmark_list landmark_list = list( zip(landmark_table.loc[:, 'lon'], landmark_table.loc[:, 'lat'], landmark_table.loc[:, 'landmark_node_id'])) global partition_list partition_list = [None] * (max(df.loc[:, 'cluster_id']) + 1) # 初始化所有partition实例 for node_it in node_list: cid = node_it.cluster_id_belongto if partition_list[cid] is None: partition_list[cid] = Partition(cid, node_list=[], taxi_list=[]) partition_list[cid].node_list.append(int(node_it.node_id)) else: partition_list[cid].node_list.append(int(node_it.node_id)) global taxi_list for taxi_it in taxi_table.index: tmp = taxi_table.loc[taxi_it] taxi_in_which_partition = check_in_which_partition( tmp['init_lon'], tmp['init_lat']) taxi_list.append( Taxi(int(tmp['taxi_id']), tmp['init_lon'], tmp['init_lat'], SYSTEM_INIT_TIME - TIME_OFFSET, partition_id_belongto=taxi_in_which_partition, seat_left=3)) partition_list[taxi_in_which_partition].taxi_list.append( int(tmp['taxi_id'])) # 初始化邻接矩阵 global node_distance_matrix node_distance_matrix = copy.copy(node_distance.values)
def __init__(self, path): self.Documents = [] self.allowed = set( [chr(i) for i in xrange(ord("a"), ord("z") + 1)] + [chr(i) for i in xrange(ord("A"), ord("Z") + 1)] + # [',','-',' '] + [str(i) for i in xrange(10)]) [",", ".", "?", "-", "!", " "] + [str(i) for i in xrange(10)] ) self.punctuation = [";", ":", "&", "?", "/"] self.P = Partition(self.punctuation) self.tagger = PatternTagger() self.sw = StopWords() with open(path, "r") as f: for line in f: line = line.strip() if line: self.Documents.append(line)
def TestEOC(p, n_start, runs, a, b, appr): dic = { "Midpoint" : MidpointApproximation, "Linear" : LinearContApproximation, "L2" : L2ConstantApproximation } f = None # Define the target function if p > 0: f = lambda x : x**p else: f = lambda x : -1/(np.log(x/np.e)) ns = [n_start * (2**n) for n in range(runs)] # Build the array of all the tested "n" values EM = None # Initialize the error manager with the relevant norm if appr == "L2": EM = ErrorManager(L2Norm) else: EM = ErrorManager(MaxNorm) interp = None for n in ns: #Foreach prescribed n u = Partition.Uniform(a, b, n) interp = dic[appr](u, f) #Build the prescribed approximation on the interval EM.PushError(interp, points = 50) #Give the result to the ErrorManager, so he can compute errors and EOC # Put the error data in a nice table so we can read it data = {"N" : EM.dofs, "Error" : EM.errors, "EOC": EM.EOC} d = PD.DataFrame(data) f, (ax1, ax2) = plt.subplots(1, 2, sharey=False, figsize=(16,6)) # Set axis limits ax1.set_xlim((-0.05, 1.05)) ax1.set_ylim((-0.05, 1.05)) interp.plot(ax1) # Plot the function EM.PlotEOC(ax2) # Plot the EOC graph return d # Return the table
def Kruskal(g: Graph): ''' Compute MST via Kruskal's algorithm Return a list of edges that comprise the MST The elements of the graph's edges are assumed to be weights ''' # list of edges in a spanning tree tree = [] # entries in pq are edges in G, with weights as keys pq = AdaptableHeapPriorityQueue() # keeps track of forest clusters forest = Partition() # map each node to its Partition entry position = {} v: Graph.Vertex for v in g.vertices(): position[v.element()] = forest.make_group(v) e: Graph.Edge for e in g.edges(): pq.add(e.weight(), e) size = g.vertex_count() # while tree not spanning and unprocessed edges remain while len(tree) != size-1 and not pq.is_empty(): edge: Graph.Edge weight, edge = pq.remove_min() u, v = edge.endpoints() a = forest.find(position[u.element()]) b = forest.find(position[v.element()]) if a != b: tree.append(edge) forest.union(a, b) return tree
def solve_MST(graph): heap = Heap(compare_fn=lambda e1, e2: 1 if e1.obj < e2.obj else -1 if e1.obj > e2.obj else 0) for edge in graph.edges: heap.insert(edge) part = Partition(value_fn=lambda v: v.obj, hash_fn=lambda v: hash(v.obj)) final = List() while len(heap) > 0 or (len(final) == len(graph.vertices) and part.subsets == 1): edge = heap.pop() v1 = edge.v1 v2 = edge.v2 if v1 not in part and v2 not in part: part.add(v1) part.add(v2) part.set_union(v1, v2) final.insert(edge) elif (v1 in part) ^ (v2 in part): if v1 not in part: part.add(v1) if v2 not in part: part.add(v2) part.set_union(v1, v2) final.insert(edge) elif not part.redundant(v1, v2): final.insert(edge) part.set_union(v1, v2) print(len(final)) for edge in final: print(edge) return final
def create_initial_policy(self) : probs = [x * 0.1 for x in xrange(0, 10)] num_goals = range(0, len(self.knowledge.goal_actions)) num_uncertain_params = range(0, len(self.knowledge.goal_params)) + [sys.maxint] num_dialog_turns = range(0, 10) yes_no = ['yes', 'no'] utterance_type = [None, 'inform_full', 'inform_param', 'affirm', 'deny'] values = list(itertools.product(*[probs, probs, num_goals, num_uncertain_params, num_dialog_turns, yes_no, utterance_type])) # Warning: Make sure you have a default value for every param in # Knowledge.goal_params default_param_values = dict() default_param_values['patient'] = ['alice'] default_param_values['recipient'] = ['hamburger'] default_param_values['location'] = ['l3_512'] examples = list() for (top_prob, sec_prob, num_goals, num_uncertain_params, num_dialog_turns, match, utterance_type) in values : if num_goals != 1 and num_uncertain_params != sys.maxint : continue elif num_goals == 1 and num_uncertain_params == sys.maxint : continue elif num_goals != len(self.knowledge.goal_actions) and utterance_type is None : continue s = SummaryState() s.knowledge = self.knowledge s.top_hypothesis_prob = top_prob s.second_hypothesis_prob = sec_prob s.num_dialog_turns = num_dialog_turns if num_goals != 1 : if utterance_type is not None : utterance = Utterance(utterance_type) s.top_hypothesis = (Partition(self.knowledge.goal_actions[0:num_goals]), utterance) if match == 'yes' : s.second_hypothesis = (Partition(self.knowledge.goal_actions[0:num_goals]), utterance) if s.get_feature_vector() != [top_prob, sec_prob, num_goals, num_uncertain_params, num_dialog_turns, match, utterance_type] : print 'Problem!' print s.get_feature_vector() print (top_prob, sec_prob, num_goals, num_uncertain_params, num_dialog_turns, match, utterance_type) print '\n' examples.append((s, 'repeat_goal')) else : #for goal in self.knowledge.goal_actions : goal = 'remind' params = dict() param_order = self.knowledge.param_order[goal] if num_uncertain_params > len(param_order) : continue num_certain_params = len(param_order) - num_uncertain_params #print 'len(param_order) = ', len(param_order) #print 'num_uncertain_params = ', num_uncertain_params #print 'num_certain_params = ', num_certain_params for (idx, param_name) in enumerate(param_order) : if idx < num_certain_params : params[param_name] = default_param_values[param_name] else : params[param_name] = self.knowledge.goal_params_values s = SummaryState() s.knowledge = self.knowledge s.top_hypothesis_prob = top_prob s.second_hypothesis_prob = sec_prob s.num_dialog_turns = num_dialog_turns if utterance_type is not None : utterance = Utterance(utterance_type) s.top_hypothesis = (Partition([goal], params), utterance) if match == 'yes' : s.second_hypothesis = (Partition([goal], params), utterance) if s.get_feature_vector() != [top_prob, sec_prob, num_goals, num_uncertain_params, num_dialog_turns, match, utterance_type] : print 'Problem!' print s.get_feature_vector() print (top_prob, sec_prob, num_goals, num_uncertain_params, num_dialog_turns, match, utterance_type) print s.top_hypothesis[0].possible_param_values print '\n' if num_uncertain_params == 0 : if top_prob < 0.3 : action = 'request_missing_param' elif top_prob < 0.9 : action = 'confirm_action' else : action = 'take_action' else : if num_uncertain_params > 0 : action = 'request_missing_param' examples.append((s, action)) #print len(examples), 'examples' D = list() mean = [] cov = [] actions = self.knowledge.summary_system_actions for (b, a) in examples : for a_prime in actions : D.append((b, a_prime)) if a == a_prime : mean.append(1.0) else : mean.append(0.0) #print 'len(D) = ', len(D) cov = np.matrix(np.zeros((len(D), len(D)))) for i in range(0, len(D)) : cov[(i,i)] = 0.1 self.D = D self.mu = np.matrix([[x] for x in mean]) self.C = cov
class Clean: def __init__(self, path): self.Documents = [] self.allowed = set([chr(i) for i in xrange(ord('a'), ord('z')+1)]+ \ [chr(i) for i in xrange(ord('A'), ord('Z')+1)] + \ #[',','-',' '] + [str(i) for i in xrange(10)]) [',','.','?','-','!',' '] + [str(i) for i in xrange(10)]) self.punctuation = [';', ':', '&', '?', "/"] self.P = Partition(self.punctuation) self.tagger = PatternTagger() self.sw = StopWords() with open(path, 'r') as f: for line in f: line = line.strip() if line: self.Documents.append(line) def is_number(self, s): try: float(s) return True except ValueError: return False def remove_stopwords(self, words, pos): new_sent = [] new_pos = [] for i in xrange(len(words)): if not self.sw.isStopWord(words[i]): new_sent.append(words[i]) new_pos.append(pos[i]) return new_sent, new_pos def replace_nums(self, s): sent = str(s) if sent[len(sent) - 1] == ".": sent = sent[0:len(sent) - 1] sent = sent.split() new_sent = [] for word in sent: if self.is_number(word): pass #new_sent.append("999999") else: new_sent.append(word) sent = " ".join(new_sent) return sent def remove_things(self, string): string = string.replace("\t", " ") string = string.replace(" and ", ", and ") new_string = [char for char in string if char in self.allowed] return "".join(new_string) def clean_and_tag(self): with open('Intermediate/full_sentences.txt', 'w') as f,\ open('Intermediate/full_pos.txt','w') as g,\ open('Intermediate/sentences.txt', 'w') as m,\ open('Intermediate/pos.txt', 'w') as n: for i in xrange(len(self.Documents)): if i % 10000 == 0 and i != 0: print str(i) + " documents processed." doc = self.Documents[i] cleaned_doc = self.remove_things(doc) blob = TextBlob(cleaned_doc) for j in xrange(len(blob.sentences)): sent = blob.sentences[j] sent = self.replace_nums(sent) split_sentence = self.P.split(sent) for k in xrange(len(split_sentence)): frag = split_sentence[k] sent_blob = TextBlob(frag, pos_tagger=self.tagger) words, pos = [], [] for word, tag in sent_blob.pos_tags: words.append(word) pos.append(tag) f.write( str(i) + ":" + str(j) + ":" + str(k) + ":" + (" ".join(words) + "\n")) g.write(" ".join(pos) + "\n") no_stop_words, no_stop_pos = self.remove_stopwords( words, pos) m.write( str(i) + ":" + str(j) + ":" + str(k) + ":" + (" ".join(no_stop_words) + "\n")) n.write(" ".join(no_stop_pos) + "\n")
def create_partitions_info(self): return [ Partition(partition, usage) for partition, usage in self.disk_usage_for_partitions() ]
class Clean: def __init__(self, path): self.Documents = [] self.allowed = set([chr(i) for i in xrange(ord('a'), ord('z')+1)]+ \ [chr(i) for i in xrange(ord('A'), ord('Z')+1)] + \ #[',','-',' '] + [str(i) for i in xrange(10)]) [',','.','?','-','!',' '] + [str(i) for i in xrange(10)]) self.punctuation = [';',':','&', '?', "/"] self.P = Partition(self.punctuation) self.tagger = PatternTagger() self.sw = StopWords() with open(path,'r') as f: for line in f: line = line.strip() if line: self.Documents.append(line) def is_number(self,s): try: float(s) return True except ValueError: return False def remove_stopwords(self, words, pos): new_sent = [] new_pos = [] for i in xrange(len(words)): if not self.sw.isStopWord(words[i]): new_sent.append(words[i]) new_pos.append(pos[i]) return new_sent,new_pos def replace_nums(self,s): sent = str(s) if sent[len(sent)-1] == ".": sent = sent[0:len(sent)-1] sent = sent.split() new_sent = [] for word in sent: if self.is_number(word): pass #new_sent.append("999999") else: new_sent.append(word) sent = " ".join(new_sent) return sent def remove_things(self, string): string = string.replace("\t", " ") string = string.replace(" and ", ", and ") new_string = [char for char in string if char in self.allowed] return "".join(new_string) def clean_and_tag(self): with open('Intermediate/full_sentences.txt', 'w') as f,\ open('Intermediate/full_pos.txt','w') as g,\ open('Intermediate/sentences.txt', 'w') as m,\ open('Intermediate/pos.txt', 'w') as n: for i in xrange(len(self.Documents)): if i%10000 == 0 and i!=0: print str(i)+" documents processed." doc = self.Documents[i] cleaned_doc = self.remove_things(doc) blob = TextBlob(cleaned_doc) for j in xrange(len(blob.sentences)): sent = blob.sentences[j] sent = self.replace_nums(sent) split_sentence = self.P.split(sent) for k in xrange(len(split_sentence)): frag = split_sentence[k] sent_blob = TextBlob(frag, pos_tagger=self.tagger) words, pos = [],[] for word,tag in sent_blob.pos_tags: words.append(word) pos.append(tag) f.write(str(i)+":"+str(j)+":"+str(k)+":"+(" ".join(words)+"\n")) g.write(" ".join(pos)+"\n") no_stop_words, no_stop_pos = self.remove_stopwords(words,pos) m.write(str(i)+":"+str(j)+":"+str(k)+":"+(" ".join(no_stop_words)+"\n")) n.write(" ".join(no_stop_pos)+"\n")
def partition_with_overlap(self, base_partition_weight, forward_overlap, backward_overlap): """ Method partitions graph into overlapping partitions based on weight criteria and overlap both given in minutes. Method starts by partitioning graph with partition method and then adds overlap. """ if not (isinstance(base_partition_weight, float) and isinstance( forward_overlap, int) and isinstance(backward_overlap, int)): raise TypeError( "Please only call this function with float, int and int as the arguments" ) self.partition(base_partition_weight) # Create partition dictionary self.partitions = dict( zip(range(len(self.partitions)), self.partitions)) for i in self.partitions: self.reset_partition_id(self.partitions[i], i) # Generate partition graph self.partition_graph = dict() for index in self.partitions: part = Partition(index) part.add_nodes(self.partitions[index]) end_nodes = self.find_partition_edge_nodes(self.partitions[index], end=True) start_nodes = self.find_partition_edge_nodes( self.partitions[index], end=False) for node in end_nodes: for f_node in self.adjacency_dict[node].forward_nodes: part.add_forward_partition( self.adjacency_dict[f_node].partition_id) for node in start_nodes: for b_node in self.adjacency_dict[node].backward_nodes: part.add_backward_partition( self.adjacency_dict[b_node].partition_id) self.partition_graph[index] = part # Generate overlapping partitions for index in self.partition_graph: for next_part in self.partition_graph[index].forward_partitions: self.forward_overlap_helper(index, next_part, 0, forward_overlap) for next_part in self.partition_graph[index].backward_partitions: self.backward_overlap_helper(index, next_part, 0, backward_overlap) # Clean up overlap between forward nodes and backward nodes for index in self.partition_graph: c_nodes = self.partition_graph[index].nodes f_nodes = self.partition_graph[index].forward_nodes b_nodes = self.partition_graph[index].backward_nodes self.partition_graph[ index].forward_nodes = f_nodes - b_nodes - c_nodes self.partition_graph[index].backward_nodes = b_nodes - c_nodes
cluster = clusters[i] if len(cluster) == 0: continue color_val = scalar_map.to_rgba(i) for line in cluster: plt.arrow(line.a[0], line.a[1], line.vector[0], line.vector[1], color=color_val) """ Setup! Processes the first n lines of the csv to begin the clustering. """ partitioner = Partition(LIKELIHOOD_THRES, MIN_VELOCITY) clusterer = Cluster(EPSILON, MIN_LINES) partitioner.pre_process(FILE_NAME, 0, 4000) partitions = partition(partitioner) clusters = clusterer.segment_cluster(partitions[0]) for cluster in clusterer.segment_cluster(partitions[1]): clusters.append(cluster) fig = plt.figure() cmap = plt.cm.jet # plt.ion() img = mpimg.imread("ref.png") plt.imshow(img) plot_clusters(clusters) # plt.draw() """ Real time!