コード例 #1
0
def mode(labels):
    return Counter(labels).most_common(1)[0][0]
コード例 #2
0
 def __init__(self):
     self.word2idx = {}
     self.idx2word = []
     self.counter = Counter()
     self.total = 0
コード例 #3
0
def count_words(sentence):
    res = sentence.replace("\n", "").lower()
    return Counter(re.findall(r"[a-z0-9]+(?:'[a-z]+)?", res))
コード例 #4
0
for i in range(len(output)):  #按时间窗聚类
    word_list = [output[i][j][0]
                 for j in range(len(output[i]))]  #每个时间窗的突发词list

    vecs = []
    for k in range(num, num + count_times[i]):
        vec = np.array([
            word_list[j] in splitword[k] for j in range(len(word_list))
        ])  #突发词是否在每个文本的分词
        vecs.append(vec.astype(int))
    clf = Clusters(vecs)
    tag = clf.classify(len(set(tags)))
    tags.extend(tag)

    print('第%s月聚类结果):' % (dates[i]))
    result = Counter(tag)
    print(result.most_common(min(5, len(result))))

    if len(word_list) == 1:
        classify = word_list[:]
    else:
        vecs_kw = []
        for j in range(len(word_list)):
            vec_kw = np.array([
                word_list[j] in splitword[m]
                for m in range(num, num + count_times[i])
            ])
            vecs_kw.append(vec_kw.astype(int))
        #计算AP
        # ap = AffinityPropagation(preference=-26).fit(vecs_kw)
        # cluster_centers_indices = ap.cluster_centers_indices_    # 预测出的中心点的索引,如[123,23,34]
コード例 #5
0
def bulid_initial_char_prob(message):
    initial_char_prob = Counter()
    for word in message:
        initial_char_prob[word[0]] += 1
    return initial_char_prob
コード例 #6
0
ファイル: handlers.py プロジェクト: CSCfi/metadata-submitter
    async def submit(self, req: Request) -> Response:
        """Handle submission.xml containing submissions to server.

        First submission info is parsed and then for every action in submission
        (add/modify/validate) corresponding operation is performed.
        Finally submission info itself is added.

        :param req: Multipart POST request with submission.xml and files
        :raises: HTTPBadRequest if request is missing some parameters or cannot be processed
        :returns: XML-based receipt from submission
        """
        files = await _extract_xml_upload(req)
        schema_types = Counter(file[1] for file in files)
        if "submission" not in schema_types:
            reason = "There must be a submission.xml file in submission."
            LOG.error(reason)
            raise web.HTTPBadRequest(reason=reason)
        if schema_types["submission"] > 1:
            reason = "You should submit only one submission.xml file."
            LOG.error(reason)
            raise web.HTTPBadRequest(reason=reason)
        submission_xml = files[0][0]
        submission_json = XMLToJSONParser().parse("submission", submission_xml)

        # Check what actions should be performed, collect them to dictionary
        actions: Dict[str, List] = {}
        for action_set in submission_json["actions"]["action"]:
            for action, attr in action_set.items():
                if not attr:
                    reason = f"""You also need to provide necessary
                                  information for submission action.
                                  Now {action} was provided without any
                                  extra information."""
                    LOG.error(reason)
                    raise web.HTTPBadRequest(reason=reason)
                LOG.debug(f"submission has action {action}")
                if attr["schema"] in actions:
                    set = []
                    set.append(actions[attr["schema"]])
                    set.append(action)
                    actions[attr["schema"]] = set
                else:
                    actions[attr["schema"]] = action

        # Go through parsed files and do the actual action
        results: List[Dict] = []
        db_client = req.app["db_client"]
        for file in files:
            content_xml = file[0]
            schema_type = file[1]
            if schema_type == "submission":
                LOG.debug("file has schema of submission type, continuing ...")
                continue  # No need to use submission xml
            action = actions[schema_type]
            if isinstance(action, List):
                for item in action:
                    result = await self._execute_action(schema_type, content_xml, db_client, item)
                    results.append(result)
            else:
                result = await self._execute_action(schema_type, content_xml, db_client, action)
                results.append(result)

        body = json.dumps(results)
        LOG.info(f"Processed a submission of {len(results)} actions.")
        return web.Response(body=body, status=200, content_type="application/json")