Python analysis Examples

Programming Language: Python

Namespace/Package Name: core

Method/Function: analysis

Examples at hotexamples.com: 4

Python analysis - 4 examples found. These are the top rated real world Python examples of core.analysis extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: Main.py Project: qwerttqq95/376_Special

    def start_to_connect(self):
        buffsize = 2048
        if self.ui.pushButton.text() == '已上线':
            return 0
        try:
            print("Connection from :", self.add)
            self._signal_text.emit("Connection from :" + self.add[0])
            while True:
                try:
                    readable, [], exceptional = select.select([self.tctimeClient], [], [self.tctimeClient], 0)
                    if self.tctimeClient in readable:
                        data = self.tctimeClient.recv(buffsize)
                        data = Comm.makestr(str(binascii.b2a_hex(data))[2:-1])
                        if data is not None and data != '':
                            print('Received message:', data)
                            self._signal_text.emit('Received message:' + data)
                            message = core.analysis(data.replace(' ', ''))
                            print('adssss', message)
                            if message is None:
                                continue
                            if message[0] == 0:
                                print('Send message:', Comm.makestr(message[1]))
                                self._signal_text.emit('Send message:' + Comm.makestr(message[1]))
                                self._signal_warm.emit((1, '登录/心跳'))
                                self.ui.pushButton.setText('已上线')
                                self.ui.menubar.setDisabled(0)
                                self.ui.lineEdit_2.setDisabled(1)
                                self.tctimeClient.send(binascii.a2b_hex(message[1]))

                            elif message[0] == 1:
                                self._signal_warm.emit((1, message[1]))

                            elif message[0] == 3:
                                self._signal_warm.emit((3, message[1]))
                    if self.tctimeClient in exceptional:
                        break
                except:
                    traceback.print_exc(file=open('bug.txt', 'a+'))
                    break
        except:
            self._signal_warm.emit((0, '端口被占用'))
            traceback.print_exc(file=open('bug.txt', 'a+'))

Example #2

Show file

def on_loop(project_id):
    # docresponse = get_documenttask(projid=project_id)
    # docdata = pd.DataFrame(docresponse)
    docdata = get_new_doc_task_db(project_id, 'docx')
    if len(docdata) == 0:
        return

    # docdata = docdata[(docdata['step'] == 1) & (docdata['fileType'] == 'dwg')]
    docdata = docdata.tail(config.n_for_project_in_loop)
    docdata.columns = [s[0].lower() + s[1:] for s in docdata.columns]

    docdata = (docdata.dropna(subset=['fileUrl', 'step']).reset_index())

    # docdata = (docdata.sort_values('name')
    #            .dropna(subset=['fileUrl', 'step'])
    #            .reset_index()
    #            )

    # basepath = os.path.join(config.root_dir, str(project_id))
    basepath = config.root_dir
    imgdir = os.path.join(config.root_dir, 'images')
    for indx, dt in docdata.iterrows():
        dt['createTime'] = str(dt['createTime'].asm8)
        print(datetime.now())
        info_log_obj = {'id': dt['fileId'], 'name': dt['name']}
        # analysis_log('开始', info_log_obj)
        if not dt['fileUrl'].startswith('http'):
            dt['step'] = 6
            change_step(dt['id'], dt.to_dict(), projid=project_id)
            analysis_log('无文件', info_log_obj)
            continue

        # 不分析一些类型
        no_analysis = False
        for tp in config.skip_file_types:
            if not dt['fileType'] or tp in dt['fileType']:
                dt['step'] = 5
                change_step(dt['id'], dt.to_dict(), projid=project_id)
                info_log_obj['type'] = dt['fileType']
                analysis_log('跳过类型', info_log_obj)
                no_analysis = True
                break
        if no_analysis:
            continue

        try:
            # 下载文件到本地文件夹
            curpath = os.path.join(basepath, dt['name'])
            download_doc(dt['fileUrl'], curpath)
        except:
            analysis_log('下载文件', info_log_obj)
            continue

        # 转换文件
        try:
            # 很大的
            if os.path.getsize(curpath) > 300 * 1000 * 1000:
                analysis_log('文件过大', info_log_obj)
                dt['step'] = 4
                change_step(dt['id'], dt.to_dict(), projid=project_id)
                analysis_log('完成', info_log_obj)
                continue

            ext_tuple = os.path.splitext(dt['name'])
            fname = ext_tuple[0]
            extname = ext_tuple[1]
            transformed = core.transform(curpath, basepath, extname)
        except:
            analysis_log('转换文件', info_log_obj)
            continue

        # 分析成字段
        try:
            kwords, kwfreq, pharr, nwarr, sumarr, attaimges, *drawing_none = core.analysis(
                curpath, extname, imgdir=imgdir, do_drawings=True)

            kwords_arr = kwords.split(',')
            real_kwords = []
            for kw in kwords_arr:
                if is_real_kw(kw):
                    real_kwords.append(kw)
            if len(real_kwords) > 5:
                low_kw = real_kwords[5:]
            else:
                low_kw = []
        except Exception as e:
            dt['step'] = 7
            change_step(dt['id'], dt.to_dict(), projid=project_id)
            analysis_log('分析成字段', info_log_obj)
            print(e)
            continue

        # 图片附件
        try:
            # 上传oss
            upload_result = core.upload_images(attaimges)

            # 写入附件表
            for atta in upload_result:
                atta_obj = {
                    "name": atta['name'],
                    "remark": "",
                    "keyword": "",
                    "abstract": utils.remove_blank(atta['abstract']),
                    "url": atta['url'],
                    "fileSize": atta['fileSize'],
                    "fileType": atta['fileType'],
                    "newWords": "",
                    "wordFrequency": "",
                    "phrases": "",
                    "linkType": "文件关联图片",
                    "fileId": dt['fileId']
                }
                add_attachment(atta_obj, projid=project_id)
        except Exception as e:
            print(e)
            analysis_log('图片附件', info_log_obj)
            continue

        # 文件表写入字段
        file_table_write_success = False
        try:
            doc_record = get_docs_byid(dt['fileId'], projid=project_id)

            # choose summary
            real_summary = []
            for su in sumarr:
                if is_real_summary(su):
                    real_summary.append(su)
            summarylimit = 3
            if len(real_summary) > summarylimit:
                real_summary = sorted(real_summary,
                                      key=lambda x: len(x),
                                      reverse=True)[:summarylimit]

            nwlimit = 900
            nwarr = utils.remove_blank(nwarr)
            if len(nwarr) > nwlimit:
                nwarr = nwarr[:nwlimit]
            updated = {
                # "keyWord": kwords,
                "keyWord": ','.join(low_kw),
                "abstract": ','.join(real_summary),
                "newWords": nwarr,
                "wordFrequency": kwfreq,
                "phrases": pharr
            }

            doc_record.update(updated)
            # print(doc_record)
            fill_docinfo(doc_record['id'], doc_record, projid=project_id)
            file_table_write_success = True
        except Exception as e:
            analysis_log('文件表填入', info_log_obj)
            print(e)
            continue

        # 创建新标签并关联
        try:
            if not real_kwords:
                analysis_log('无内容', info_log_obj)
            else:
                alltags = get_doctag(projid=project_id)
                if len(real_kwords) >= config.web_keywords_num:
                    curtags = real_kwords[:config.web_keywords_num]
                else:
                    curtags = real_kwords
                dtrels = []
                for curtag in curtags:
                    existq = False
                    for t in alltags:
                        if str(t['name']).upper() == str(curtag).upper():
                            dtrels.append((dt['fileId'], t['id']))
                            existq = True
                            break
                    if not existq:
                        tagid = create_doctag(curtag, projid=project_id)
                        dtrels.append((dt['fileId'], tagid))
                # 写入关联文件和标签
                create_doctagrel(dtrels, projid=project_id)
        except Exception as e:
            analysis_log('标签', info_log_obj)
            print(e)
            continue

        # 更改task的阶段为已完成
        if file_table_write_success:
            dt['step'] = 2
            change_step(dt['id'], dt.to_dict(), projid=project_id)

        # 删除本地下载文件
        pass
        analysis_log('完成', info_log_obj)

    # delete_doctagrel(13, projid=project_id)
    print('end proj')

Example #3

Show file

            if not transformed:
                shutil.copy(fpath, filedir)

reanalysis = False
if reanalysis:
    print('analysis')
    result = []
    imgresult = []
    drawingresult = []
    for indx, fullname in enumerate(fname_arr):
        print(fullname)
        ext_tuple = os.path.splitext(fullname)
        fname = ext_tuple[0]
        extname = ext_tuple[1]
        fpath = os.path.join(filedir, fullname)
        kwords, kwfreq, pharr, nwarr, sumarr, curimg, curdrawing = core.analysis(
            fpath, extname, imgdir)
        fid = indx + 100
        result.append({
            'id': fid,
            'fname': fname,
            'extname': extname,
            'username': username,
            'keywords': kwords,
            'kwfreq': kwfreq,
            'phrase': pharr,
            'newwords': nwarr,
            'summary': sumarr
        })
        imgresult += curimg
        for d in curdrawing:
            d['drawing_id'] = fid

Example #4

Show file

File: analysislocal.py Project: pengyang486868/PY-read-Document

def on_loop(project_id):
    docresponse = get_documenttask(projid=project_id)
    docdata = pd.DataFrame(docresponse)

    if len(docdata) == 0:
        return

    docdata = docdata[docdata['step'] == 1]
    docdata = docdata.tail(config.n_for_project_in_loop)

    docdata = (docdata
               # .sort_values('name')
               .dropna(subset=['fileUrl', 'step'])
               .reset_index()
               )

    # basepath = os.path.join(config.root_dir, str(project_id))
    basepath = r'E:\file-local-analysis'
    for indx, dt in docdata.iterrows():
        info_log_obj = {'id': dt['fileId'], 'name': dt['name']}
        print()
        analysis_log('开始', info_log_obj)

        # if not dt['fileUrl'].startswith('http'):
        #     analysis_log('无文件', info_log_obj)
        #     continue

        try:
            # curpath = os.path.join(basepath, dt['name'])
            curpath = dt['fileUrl']

            # transformed = core.transform(curpath, basepath, extname)
            ext_tuple = os.path.splitext(dt['name'])
            extname = ext_tuple[1]

            # 补写
            # if extname != '.dwg' and extname != '.rar':
            #     continue
            # analysis_log('开始', info_log_obj)
            # 补写

            if extname == '.doc':
                transdoc.doc2docx(curpath, basepath, remove=False)
                curpath = os.path.join(basepath, dt['name'])
            if extname == '.ppt':
                transppt.ppt2pptx(curpath, basepath, remove=False)
                curpath = os.path.join(basepath, dt['name'])

            # dwg rar本地转移 在线分析不用
            if extname == '.dwg':
                shutil.copy(curpath, basepath)
                curpath = os.path.join(basepath, dt['name'])
            if extname == '.rar' or extname == '.zip':
                shutil.copy(curpath, basepath)
                curpath = os.path.join(basepath, dt['name'])

            # 很大的
            if os.path.getsize(dt['fileUrl']) > 100 * 1000 * 1000:
                analysis_log('文件过大', info_log_obj)
                dt['step'] = 2
                change_step(dt['id'], dt.to_dict(), projid=project_id)
                continue
        except Exception as e:
            analysis_log('下载和转换文件', info_log_obj)
            continue

        # 分析成字段
        try:
            kwords, kwfreq, pharr, nwarr, sumarr, *img_none = core.analysis(
                curpath, extname, imgdir=None, do_drawings=True)

            kwords_arr = kwords.split(',')
            real_kwords = []
            for kw in kwords_arr:
                if is_real_kw(kw):
                    real_kwords.append(kw)
            if len(real_kwords) > 5:
                low_kw = real_kwords[5:]
            else:
                low_kw = []
        except Exception as e:
            analysis_log('分析成字段', info_log_obj)
            print(e)

            # avoid always fail
            dt['step'] = 2
            change_step(dt['id'], dt.to_dict(), projid=project_id)
            # avoid always fail
            continue

        # 文件表写入字段
        file_table_write_success = False
        try:
            doc_record = get_docs_byid(dt['fileId'], projid=project_id)

            # choose summary
            real_summary = []
            for su in sumarr:
                if is_real_summary(su):
                    real_summary.append(su)
            summarylimit = 3
            if len(real_summary) > summarylimit:
                real_summary = sorted(real_summary,
                                      key=lambda x: len(x),
                                      reverse=True)[:summarylimit]

            nwlimit = 900
            nwarr = utils.remove_blank(nwarr)
            if len(nwarr) > nwlimit:
                nwarr = nwarr[:nwlimit]
            updated = {
                # "keyWord": kwords,
                "keyWord": ','.join(low_kw),
                "abstract": ','.join(real_summary),
                "newWords": nwarr,
                "wordFrequency": kwfreq,
                "phrases": pharr
            }

            doc_record.update(updated)
            # print(doc_record)
            fill_docinfo(doc_record['id'], doc_record, projid=project_id)
            file_table_write_success = True
        except Exception as e:
            analysis_log('文件表填入', info_log_obj)
            continue

        # 创建新标签并关联
        try:
            if not real_kwords:
                analysis_log('无内容', info_log_obj)
            else:
                alltags = get_doctag(projid=project_id)
                if len(real_kwords) >= config.web_keywords_num:
                    curtags = real_kwords[:config.web_keywords_num]
                else:
                    curtags = real_kwords
                dtrels = []
                for curtag in curtags:
                    existq = False
                    for t in alltags:
                        if str(t['name']).upper() == str(curtag).upper():
                            dtrels.append((dt['fileId'], t['id']))
                            existq = True
                            break
                    if not existq:
                        tagid = create_doctag(curtag, projid=project_id)
                        dtrels.append((dt['fileId'], tagid))
                # 写入关联文件和标签
                create_doctagrel(dtrels, projid=project_id)
        except:
            analysis_log('标签', info_log_obj)
            continue

        # 更改task的阶段为已完成
        if file_table_write_success:
            dt['step'] = 2
            change_step(dt['id'], dt.to_dict(), projid=project_id)

        # 删除本地下载文件
        pass
        analysis_log('完成', info_log_obj)

    # delete_doctagrel(13, projid=project_id)
    print('end proj')