def mode(labels): return Counter(labels).most_common(1)[0][0]
def __init__(self): self.word2idx = {} self.idx2word = [] self.counter = Counter() self.total = 0
def count_words(sentence): res = sentence.replace("\n", "").lower() return Counter(re.findall(r"[a-z0-9]+(?:'[a-z]+)?", res))
for i in range(len(output)): #按时间窗聚类 word_list = [output[i][j][0] for j in range(len(output[i]))] #每个时间窗的突发词list vecs = [] for k in range(num, num + count_times[i]): vec = np.array([ word_list[j] in splitword[k] for j in range(len(word_list)) ]) #突发词是否在每个文本的分词 vecs.append(vec.astype(int)) clf = Clusters(vecs) tag = clf.classify(len(set(tags))) tags.extend(tag) print('第%s月聚类结果):' % (dates[i])) result = Counter(tag) print(result.most_common(min(5, len(result)))) if len(word_list) == 1: classify = word_list[:] else: vecs_kw = [] for j in range(len(word_list)): vec_kw = np.array([ word_list[j] in splitword[m] for m in range(num, num + count_times[i]) ]) vecs_kw.append(vec_kw.astype(int)) #计算AP # ap = AffinityPropagation(preference=-26).fit(vecs_kw) # cluster_centers_indices = ap.cluster_centers_indices_ # 预测出的中心点的索引,如[123,23,34]
def bulid_initial_char_prob(message): initial_char_prob = Counter() for word in message: initial_char_prob[word[0]] += 1 return initial_char_prob
async def submit(self, req: Request) -> Response: """Handle submission.xml containing submissions to server. First submission info is parsed and then for every action in submission (add/modify/validate) corresponding operation is performed. Finally submission info itself is added. :param req: Multipart POST request with submission.xml and files :raises: HTTPBadRequest if request is missing some parameters or cannot be processed :returns: XML-based receipt from submission """ files = await _extract_xml_upload(req) schema_types = Counter(file[1] for file in files) if "submission" not in schema_types: reason = "There must be a submission.xml file in submission." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) if schema_types["submission"] > 1: reason = "You should submit only one submission.xml file." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) submission_xml = files[0][0] submission_json = XMLToJSONParser().parse("submission", submission_xml) # Check what actions should be performed, collect them to dictionary actions: Dict[str, List] = {} for action_set in submission_json["actions"]["action"]: for action, attr in action_set.items(): if not attr: reason = f"""You also need to provide necessary information for submission action. Now {action} was provided without any extra information.""" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) LOG.debug(f"submission has action {action}") if attr["schema"] in actions: set = [] set.append(actions[attr["schema"]]) set.append(action) actions[attr["schema"]] = set else: actions[attr["schema"]] = action # Go through parsed files and do the actual action results: List[Dict] = [] db_client = req.app["db_client"] for file in files: content_xml = file[0] schema_type = file[1] if schema_type == "submission": LOG.debug("file has schema of submission type, continuing ...") continue # No need to use submission xml action = actions[schema_type] if isinstance(action, List): for item in action: result = await self._execute_action(schema_type, content_xml, db_client, item) results.append(result) else: result = await self._execute_action(schema_type, content_xml, db_client, action) results.append(result) body = json.dumps(results) LOG.info(f"Processed a submission of {len(results)} actions.") return web.Response(body=body, status=200, content_type="application/json")