import docx import os className = '16计算机1' name = "周易" num = '16211160127' # filename = input() doc = docx.Document('tres.docx') for table in doc.tables: cell = table.rows[1].cells cell[1].text = className cell[3].text = name cell[5].text = num doc.save('tres.docx')
##----- CONFIGURATION END -----## strPath = os.getcwd() # strPath = 'Z:\\1. Micro Evaluation\\VELOCITY\\paragraph_predict\\XPSR\\test\\test2\\wrong' # doc = docx.AdvSearch() translist = list() ### --- Directory Crawl --- ### for file in os.listdir(strPath): if file.endswith('.docx'): doc_id = re.split('_|\s', os.path.splitext(file)[0])[0] print("_________\ncreated document_id: {} for: \n{}".format( doc_id, file)) doc = docx.Document(file) sublesscounter = 0 # paralist = [] for i in range(len(doc.paragraphs)): headline = '' lesson_id = None what = '' based = '' lesson = '' if re.match( r'^%s\b' % init_word_1, doc.paragraphs[i].text): #.startswith('Headline') == True: sublesscounter += 1 lesson_id = str(doc_id + '-' + str(sublesscounter)) print('_________\ninit.DEBUG:----> ', i)
# importing libraries import re import requests import bs4 import docx #import pandas as pd import oxford as ox # importing text doc = docx.Document('test.docx') a = doc.paragraphs[0].text # extracting words pattern = '([A-Za-z]{4,})' w_dict = re.findall(pattern, a) # formatting c = 0 r_dict = {} for i in w_dict: if len(r_dict) < 40: try: ox.Word.get(i) except: continue wordform = ox.Word.wordform() try: definition = ox.Word.definitions()[0] r_dict.update({i: [wordform, definition]}) except: continue
def meaning(self): if os.path.isfile(self.path): doc = docx.Document(self.path) else: doc = docx.Document() doc.add_heading("Defination's", 0) sections = doc.sections for section in sections: section.top_margin = Cm(1.5) section.bottom_margin = Cm(1) section.left_margin = Cm(1.5) section.right_margin = Cm(1.5) url = 'https://www.lexico.com/en/definition/' + self.word page = requests.get(url) data = page.content if (page.status_code == 200): soup = BeautifulSoup(data, 'html.parser') sect = soup.find_all("section", "gramb") for i in range(len(sect)): typeOfWord = sect[i].find("span", "pos") typeOfWord = typeOfWord.text.title() mean = sect[i].find("span", "ind") try: example = sect[i].find("div", "ex") example = example.text except: example = 'N.A' try: syn1 = sect[i].find("strong", "syn") synonyms = syn1.text syn2 = sect[i].find("span", "syn") synonyms = synonyms + syn2.text synonymList = synonyms.split(',') syno = '' for i in range(5): if i != 0: syno = syno + ', ' + synonymList[i] else: syno = synonymList[i] except AttributeError: syno = 'N.A' para = self.word.capitalize( ) + ' : ' + mean.text + '\n' + 'Example : ' + example + '\n' + 'Synonyms : ' + syno font = doc.styles['Normal'].font font.name = 'Calibri' font.size = Pt(14) doc.add_paragraph(para) doc.save(self.path) return 0 else: print( 'Ooops!! The Website may be under maintenance or shifted to new address (URL).' )
#! /usr/bin/python3 import docx file_path = "./text_files/python_word_letter.docx" word_document = docx.Document(file_path) print(word_document.paragraphs) title = word_document.paragraphs[0] title.style = word_document.styles["Heading 1"] for p in word_document.paragraphs: print(p.text) word_document.save(file_path)
def post(self, request, *args, **kwargs): user_id = request.headers.get('userid') try: user_obj = CustomUser.objects.get(id=user_id) except CustomUser.DoesNotExist: return Response({ "status": "401", "message": messages.UNAUTHORIZED }, status=status.HTTP_401_UNAUTHORIZED) permission_object = Permission.objects.get(name="Import Questions") permission_list = [ permission.id for permission in user_obj.permission.all() ] is_permission = True if permission_object.id in permission_list else False if is_permission: try: serializer = ImportQuestionSerializer(data=request.data) if serializer.is_valid(): dataset = Dataset() uploaded_file = request.FILES['question_file'] if uploaded_file.name.endswith('docx'): print('if up') import docx doc = docx.Document(uploaded_file) paras = [p.text for p in doc.paragraphs if p.text] print("paras", paras, type(paras)) # imported_data = dataset.load(uploaded_file.read(), format='docx') # for data in imported_data: # print("data",data) if uploaded_file.name.endswith('xlsx'): print('if up') imported_data = dataset.load(uploaded_file.read(), format='xlsx') for data in imported_data: print("data", data) tmpOptionsList = ['A', 'B', 'C', 'D', 'E'] # tmpOptionsList.append(data[5]) # tmpOptionsList.append(data[6]) # tmpOptionsList.append(data[7]) # tmpOptionsList.append(data[8]) # tmpOptionsList.append(data[9]) # print("tmpOptionsList",tmpOptionsList) if data[11] not in tmpOptionsList: print("if 1") return Response( { "status": "400", "message": messages.MISMATCHOPTIONANSWER }, status=status.HTTP_400_BAD_REQUEST) if Question.objects.filter( question_type__name=data[0], subject__name=data[2], topic__name=data[3], sub_group__sub_group=data[1], question=data[4], created_by=user_obj.id).exists(): print("already added question") topic_obj = Topic.objects.get(name=data[3]) exam_type_obj = ExamType.objects.get( name=data[0]) if data[1] != None: sub_group_obj = SubGroup.objects.get( sub_group=data[1], created_by=user_obj.id) subject_obj = Subject.objects.get( sub_group_id=sub_group_obj, name=data[2]) Question.objects.filter( question=data[4], created_by=user_obj.id).update( created_by=user_obj.id, question_type=exam_type_obj.id, subject=subject_obj, topic=topic_obj, sub_group=sub_group_obj, option_A=data[5], option_B=data[6], option_C=data[7], option_D=data[8], option_E=data[9], marks=data[10], answer=data[11]) else: subject_obj = Subject.objects.get( name=data[2]) Question.objects.filter( question=data[4], created_by=user_obj.id).update( created_by=user_obj.id, question_type=exam_type_obj.id, subject=subject_obj, topic=topic_obj, option_A=data[5], option_B=data[6], option_C=data[7], option_D=data[8], option_E=data[9], marks=data[10], answer=data[11]) else: print('else') topic_obj = Topic.objects.get(name=data[3]) exam_type_obj = ExamType.objects.get( name=data[0]) if data[1] != None: print("sub_group", data[1], user_obj.id) sub_group_obj = SubGroup.objects.get( sub_group=data[1], created_by=user_obj.id) print("sub_group_obj", sub_group_obj) subject_obj = Subject.objects.get( sub_group_id=sub_group_obj, name=data[2]) data1 = { 'created_by': user_obj.id, 'question_type': exam_type_obj.id, 'subject': subject_obj.id, 'topic': topic_obj.id, 'sub_group': sub_group_obj.id, 'question': data[4], 'option_A': data[5], 'option_B': data[6], 'option_C': data[7], 'option_D': data[8], 'option_E': data[9], 'marks': data[10], 'answer': data[11] } else: subject_obj = Subject.objects.get( name=data[2]) data1 = { 'created_by': user_obj.id, 'question_type': exam_type_obj.id, 'subject': subject_obj.id, 'topic': topic_obj.id, 'question': data[4], 'option_A': data[5], 'option_B': data[6], 'option_C': data[7], 'option_D': data[8], 'option_E': data[9], 'marks': data[10], 'answer': data[11] } serializer1 = QuestionSerializer(data=data1) if serializer1.is_valid(): serializer1.save() else: return Response( { "status": "400", "message": serializer1.errors }, status=status.HTTP_400_BAD_REQUEST) return Response( { "status": "200", "message": messages.CREATED, "data": { "file_path": "/media/excel_files/questions_import.xlsx", "file_name": "questions_import.xlsx" } }, status=status.HTTP_200_OK) return Response( { "status": "400", "message": "unsupported file type" }, status=status.HTTP_400_BAD_REQUEST) return Response({ "status": "400", "message": serializer.errors }, status=status.HTTP_400_BAD_REQUEST) except Exception as error: return Response({ "status": "400", "message": str(error) }, status=status.HTTP_400_BAD_REQUEST) return Response({ "status": "401", "message": messages.UNAUTHORIZED }, status=status.HTTP_401_UNAUTHORIZED)
def getText(filename): doc = docx.Document(filename) fullText = [] for p in doc.paragraphs: fullText.append(p.text) return '\n'.join(fullText)
import docx import save_data import os path = '.\data_all' save_path = '.\save_data' files = os.listdir(path) if not os.path.exists(save_path): os.makedirs(save_path) save_data.delete(save_path) for file in files: if not os.path.isdir(file): doc = docx.Document(path + '/' + file) for para in doc.paragraphs: data = para.text if len(data) > 20: r1, r2, r3 = save_data.match(data) file_new = file.replace('.docx', '.csv') save_data.save(r1, r2, r3, save_path + '/' + file_new)
def generar_informe(request, informe_de, parametros, tipo): """Generar informes sobre algun/a persona/objeto. informe_de -> Tipo de informe, puede ser: empleado, cliente, administrador, vehículo, etc. parametros -> Valores/restricciones del informe, puede ser: Todo, último mes, última semana, último año, o incluso descartados. tipo -> formato de salida del informe, puede ser: excel, pdf, csv, word. El flujo de datos va de CSV a XLSX, luego pasa a Docx y finalmente se convierte en PDF, previo es obligado. """ tablas_db = apps.all_models['Mantenedor'] informes = { # 'NOMBRE_EN_FORM_DE_HTML' : tablas_db['NOMBRE_TABLA_ORACLE'], 'empleado': tablas_db['empleado'], 'cliente': tablas_db['cliente'], 'proveedor': tablas_db['proveedor'], 'administrador': tablas_db['perfil'], 'vehiculo': tablas_db['infoauto'], } # Abreviación, extensión y 'content-type' de archivos y sus formatos. tipos_admitidos = { 'csv': ['csv', 'text/csv'], 'excel': [ 'xlsx', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ], 'word': [ 'docx', ' application/vnd.openxmlformats-officedocument.wordprocessingml.document' ], 'pdf': ['pdf', 'application/pdf'], } # Se activa la traducción de fechas a español. activate('es') # Obtenemos el nombre del mes en español. today = datetime.date.today() mes = date(today, 'F') # Se asigna la fecha actual de dos maneras, # Una para ser escrita en los nombres de archivos, # La otra para ser escrita dentro de los informes. now = datetime.datetime.now().strftime('%Y-%m-%d__%H_%M_%S') now_ = datetime.datetime.now().strftime(f'%d de {mes} de %Y, %H:%M %p') # Se normaliza el dato. tipo = tipo.lower() # Validación de formato. if tipo not in ['csv', 'excel', 'word', 'pdf']: return HttpResponse('ERROR, el tipo de formato no es válido!') # Se define el nombre del archivo. nombre_archivo = f'informe_{informe_de}_{now}' nombre_archivo_con_extension = f'informe_{informe_de}_{now}.{tipos_admitidos[tipo][0]}' # Se define el tipo de respuesta y la cabecera. response = HttpResponse( content_type=f'{tipos_admitidos[tipo][1]}', headers={ 'Content-Disposition': f'attachment; filename="{nombre_archivo_con_extension}"' }, ) # Obtener los títulos de una tabla. fields = informes[informe_de]._meta.get_fields() titulos = list() for titulo in fields: try: titulo.field except AttributeError: titulos.append(titulo.name) writer = csv.writer(response) writer.writerow(titulos) # Se obtiene la info de la base de datos. nombre_campos = informes[informe_de]._meta.get_fields() for fila in informes[informe_de].objects.all(): temp = list() for columna in nombre_campos: try: temp.append(fila.serializable_value(columna.name)) except AttributeError: pass writer.writerow(temp) # Devuelve un archivo CSV. if tipo == 'csv': return response # Se define la ubicación de los archivos temporales. temp_folder = f'{os.path.realpath(".")}\\__temp\\' temp_csv = f'{temp_folder}__temp.csv' # Se corrobora que exista la carpeta temporal. if not os.path.exists(temp_folder): os.mkdir(temp_folder) # Se escribe el CSV en físico. temp = open(temp_csv, 'wb') temp.write(response.content) temp.close() # Devuelve un archivo XLSX. if tipo == 'excel': # Pandas lee el CSV desde un archivo. archivo_leido = pandas.read_csv(temp_csv) # Se convierte a excel y se almacena como archivo XLSX. archivo_leido.to_excel(f'{temp_folder}{nombre_archivo_con_extension}', index=None, header=True, sheet_name=f'{informe_de}') # Devuelve un archivo XLSX. return FileResponse( open(f'{temp_folder}{nombre_archivo_con_extension}', 'rb')) # Se crea y se rellena un archivo DOCX. document = docx.Document() document.add_heading(f'Informe de {informe_de}', 0) document.add_paragraph(f'Con fecha {now_}.') with open(temp_csv, newline='') as f: csv_reader = csv.reader(f) csv_headers = next(csv_reader) csv_cols = len(csv_headers) table = document.add_table(rows=2, cols=csv_cols) hdr_cells = table.rows[0].cells for i in range(csv_cols): hdr_cells[i].text = csv_headers[i] for row in csv_reader: row_cells = table.add_row().cells for i in range(csv_cols): try: row_cells[i].text = row[i] except IndexError: pass document.add_page_break() document.save(f'{temp_folder}{nombre_archivo}.docx') # Devuelve un archivo DOCX. if tipo == 'word': return FileResponse(open(f'{temp_folder}{nombre_archivo}.docx', 'rb')) if tipo == 'pdf': try: # Solución 1. # Usando MS-Office 365 print('\nUsando Office 365\n') docx2pdf.convert(f'__temp\{nombre_archivo}.docx') except: import subprocess # Solución 2. # Usando LibreOffice. print('\nUsando LibreOffice\n') path_to_soffice_exe = '"C:\Program Files\LibreOffice\program\soffice.exe"' to_pdf = '-headless -convert-to pdf' outdir = '-outdir .\__temp' res = subprocess.run( f'{path_to_soffice_exe} {to_pdf} {outdir} "__temp\{nombre_archivo}.docx"' ) print(f'\n\n{res}\n\n') return FileResponse(open(f'{temp_folder}{nombre_archivo}.pdf', 'rb')) else: return HttpResponse('Error con el servidor...')
def extract_keywords(): f_raw = docx.Document(FilePool.raw_docx2) for para in f_raw.paragraphs: if len(para.runs) != 0 and para.runs[-1].bold: keyword = para.text.strip() print(keyword)
import docx docFileObj = docx.Document('Pen.docx')
def run(self): new_files = [] files_detection_error_encoding = [] files_detection_error_text_type = [] files_detection_error_lang = [] if self.file_paths: len_file_paths = len(self.file_paths) for i, file_path in enumerate(self.file_paths): self.progress_updated.emit( self.tr(f'Opening files ... ({i + 1}/{len_file_paths})')) default_dir = wl_checking_misc.check_dir( self.main.settings_custom['import']['temp_files'] ['default_path']) default_encoding = self.main.settings_custom['import'][ 'temp_files']['default_encoding'] file_path = wl_misc.get_normalized_path(file_path) file_name, file_ext = os.path.splitext( os.path.basename(file_path)) file_ext = file_ext.lower() # Text files if file_ext == '.txt': (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang ) = self.main.wl_files._new_file(file_path) new_files.append(new_file) if not detection_success_encoding: files_detection_error_encoding.append(new_file['path']) if not detection_success_text_type: files_detection_error_text_type.append( new_file['path']) if not detection_success_lang: files_detection_error_lang.append(new_file['path']) else: if file_ext in ['.docx', '.xlsx', '.xls']: new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) # Word documents if file_ext == '.docx': lines = [] with open(new_path, 'w', encoding=default_encoding) as f: doc = docx.Document(file_path) for block in self.iter_block_items(doc): if type(block ) == docx.text.paragraph.Paragraph: f.write(f'{block.text}\n') elif type(block) == docx.table.Table: for row in self.iter_visual_cells( block): cells = [] for cell in row: cells.append(' '.join([ item.text for item in self.iter_cell_items(cell) ])) f.write('\t'.join(cells) + '\n') # Excel workbooks elif file_ext == '.xlsx': with open(new_path, 'w', encoding=default_encoding) as f: workbook = openpyxl.load_workbook( file_path, data_only=True) for worksheet_name in workbook.sheetnames: worksheet = workbook[worksheet_name] for row in worksheet.rows: f.write('\t'.join([( cell.value if cell.value != None else '') for cell in row]) + '\n') elif file_ext == '.xls': with open(new_path, 'w', encoding=default_encoding) as f: workbook = xlrd.open_workbook(file_path) for i_sheet in range(workbook.nsheets): worksheet = workbook.sheet_by_index( i_sheet) for row in range(worksheet.nrows): f.write('\t'.join([ worksheet.cell_value(row, col) for col in range(worksheet.ncols) ]) + '\n') new_paths = [new_path] else: # Detect encoding if self.main.settings_custom['files'][ 'auto_detection_settings']['detect_encodings']: encoding_code, _ = wl_detection.detect_encoding( self.main, file_path) else: encoding_code = self.main.settings_custom[ 'auto_detection']['default_settings'][ 'default_encoding'] # CSV files if file_ext == '.csv': new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: with open(file_path, 'r', newline='', encoding=encoding_code) as f_csv: csv_reader = csv.reader(f_csv) for row in csv_reader: f.write('\t'.join(row) + '\n') new_paths = [new_path] # HTML files elif file_ext in ['.htm', '.html']: with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml') new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(soup.get_text()) new_paths = [new_path] # XML files elif file_ext == '.xml': with open(file_path, 'r', encoding=encoding_code) as f: xml_text = f.read() new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(xml_text) new_paths = [new_path] # Translation memory files elif file_ext == '.tmx': lines_src = [] lines_target = [] with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml-xml') for tu in soup.find_all('tu'): seg_src, seg_target = tu.find_all('seg') lines_src.append(seg_src.get_text()) lines_target.append(seg_target.get_text()) path_src = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_source.txt')) path_target = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_target.txt')) with open(path_src, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_src)) f.write('\n') with open(path_target, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_target)) f.write('\n') new_paths = [path_src, path_target] # Lyrics files elif file_ext == '.lrc': lyrics = {} with open(file_path, 'r', encoding=encoding_code) as f: for line in f: time_tags = [] line = line.strip() # Strip time tags while re.search(r'^\[[^\]]+?\]', line): time_tags.append( re.search(r'^\[[^\]]+?\]', line).group()) line = line[len(time_tags[-1]):].strip( ) # Strip word time tags line = re.sub(r'<[^>]+?>', r'', line) line = re.sub(r'\s{2,}', r' ', line).strip() for time_tag in time_tags: if re.search( r'^\[[0-9]{2}:[0-5][0-9]\.[0-9]{2}\]$', time_tag): lyrics[time_tag] = line new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: for _, lyrics in sorted(lyrics.items()): f.write(f'{lyrics}\n') new_paths = [new_path] for new_path in new_paths: (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang ) = self.main.wl_files._new_file(new_path, txt=False) new_files.append(new_file) if not detection_success_encoding: files_detection_error_encoding.append( new_file['path']) if not detection_success_text_type: files_detection_error_text_type.append( new_file['path']) if not detection_success_lang: files_detection_error_lang.append(new_file['path']) self.main.settings_custom['import']['files'][ 'default_path'] = wl_misc.get_normalized_dir( self.file_paths[0]) self.progress_updated.emit(self.tr('Updating table ...')) time.sleep(0.1) self.worker_done.emit(new_files, files_detection_error_encoding, files_detection_error_text_type, files_detection_error_lang)
def Save_File_Docx(content, file_name): doc = docx.Document() doc.add_paragraph(content) doc.save(file_name + ' Standard.docx')
def read_doc(self,path):#path or io x = docx.Document(path) return x
# print(i) # xml_string = source_document._body._element.xml # xml = parseString(xml_string) # contents = xml.getElementsByTagName('w:bookmarkStart') # # for i in xml: # # print(i) # for i in contents: # print(i) import docx file_name = r'C:\Users\thameem.sakkarai\Desktop\check\AOF edited document.docx' document = docx.Document(docx = file_name) section = document.sections header = section.header print(header) print(dir(document)) print(help(document)) core_properties = document.core_properties print(core_properties.author) print(core_properties.created) print(core_properties.last_modified_by) print(core_properties.last_printed) print(core_properties.modified) print(core_properties.revision) print(core_properties.title) print(core_properties.category)
import docx from docx.shared import RGBColor, Pt # Get message from fake message fake_text = docx.Document('fakeMessage.docx') fake_list = [] for paragraph in fake_text.paragraphs: fake_list.append(paragraph.text) # Get text from real message real_text = docx.Document('realMessage.docx') real_list = [] for paragraph in real_text.paragraphs: if len(paragraph.text) != 0: real_list.append(paragraph.text) # Get template form style, font and etc. doc = docx.Document('template.docx') doc.add_heading('Ilsaf Nabiullin', 0) subtitle = doc.add_heading('Nks1ckk & co', 1) subtitle.alignment = 1 doc.add_heading('', 1) doc.add_paragraph('2 Jan 2021') doc.add_paragraph('') def set_spacing(paragraph): """set space between paragraphs"""
def classify(file_name): file = docx.Document(file_name) print("段落数:" + str(len(file.paragraphs))) # 输出段落数 file_word = docx.Document() information = '' for para in file.paragraphs: information = information + para.text # print(information) # information = docx.Document("E:/DMY important/段铭杨毕业设计/A-GCNN/测试文档.docx") # information = "项目经验人力资源000000管理系统项目时间:2019年01月-2019年02月项目简介: 使用技术及语言:SSM框架/Java 开发工具:Eclipse/STS 开发人员: 独立完成 项目描述:管理员对部门、员工、职位、培训、招聘、奖惩、考勤的CRUD;游客浏览招聘,对自己简历的CRUD,投递;员工查看自己的信息、培训、公司通讯录、奖惩、薪资和打卡。本项目结构上分为表现层、业务层和数据访问层,层次间的依赖关系自下到上。业务层封装业务流程,为适应业务的变更,每一业务模块均有专门的接口及实现类。 项目业绩: 项目收获:只是离现在最近的小项目,花费半个多月的时间完成。功能繁琐,但我没有放弃,虚心请教一些大佬,在此我也由衷感谢他们。对于代码的编写,一定要字斟句酌,一个不起眼的问题,就会导致系统BUG。程序员也是在代码的海洋里磨炼起来的!" a_cut = jieba.cut(information) a = '/'.join(a_cut) after_cut = a.split('/') # print(after_cut) for i in range(3): for i in after_cut: if len(i) > 2: after_cut.remove(i) # print(after_cut) final_cut = [] for item in after_cut: if item != ' ' and item != ' ' and item != '': final_cut.append(item) # print(final_cut) # 去噪,导入中文停用词表 file_stop = r'./简历分类测试/dmy_stopwords.txt' stop = [] standard_stop = [] final_information = [] with open(file_stop, 'r', encoding='utf-8-sig') as f: lines = f.readlines() for line in lines: lline = line.strip() stop.append(lline) for i in range(0, len(stop)): for word in stop[i].split(): standard_stop.append(word) for i in final_cut: if i not in standard_stop: final_information.append(i) # print(final_information) # 将得到的文本向量化 model_1 = gensim.models.Word2Vec.load(path) X = [] sentence = [] word_vec = [] zero_array = [0] * 256 word_vec = np.zeros([256, ], dtype=np.float64) for j in range(len(final_information)): word_vec = list(model_1[final_information[j]]) sentence.append(word_vec) for j in range(128 - len(final_information)): sentence.append(zero_array) X.append(sentence) X = np.array(X) predict = model.predict(X) print(predict) # 上面的predict就是最终的分类结果,是一个数组,其中分值最大对应的类别标签就为判定的类别 if predict[0][0] > predict[0][1] and predict[0][0] > predict[0][2] and predict[0][0] > predict[0][3] and predict[0][ 0] > predict[0][4]: return 'JAVA工程师' if predict[0][1] > predict[0][2] and predict[0][1] > predict[0][3] and predict[0][1] > predict[0][4] and predict[0][ 1] > predict[0][0]: return '技术总监' if predict[0][2] > predict[0][3] and predict[0][2] > predict[0][4] and predict[0][2] > predict[0][1] and predict[0][ 2] > predict[0][0]: return 'Web工程师' if [0][3] > predict[0][4] and predict[0][3] > predict[0][2] and predict[0][3] > predict[0][1] and predict[0][3] > \ predict[0][0]: return '大数据工程师' if predict[0][4] > predict[0][3] and predict[0][4] > predict[0][2] and predict[0][4] > predict[0][0] and predict[0][ 4] > predict[0][1]: return '算法工程师'
def gerarRecomendacao(): # Dentro da função criamos um arquivo .docx vazio que receberá os dados de recomendação das amostras # Sendo que, para cada amostra será gerado um arquivo diferente.docx, ao final de cada looping doc = docx.Document() # O looping for irá verificar linha por linha da nossa planilha de dados for row in dadosData: # Uma instrução if que faz com que o laço ignore # a primeira linha da planilha de dados (o cabeçalho) # mas que a partir da segunda linha passe a fazer a leitura de dados de cada amostra if str(row[0]) != str('Amostra'): # Buscamos os dados de acordo com seu indice em cada linha da planilha # Convertemos os dados para o tipo de dados que cada um represente # Armazenamos os dados em variáveis com nomes mais familiares # Os três passos acima são feitos para cada linha da planilha # Onde cada linha representa uma amostra de solo diferente # amx refere-se ao nome em que o que arquivo de recomendação será salvo utilizando a função # doc.save() dentro do loop amx = str(row[1]) + '.docx' am = str(row[1]) pro = str(row[2]) k = float(row[5]) na = float(row[6]) ca = float(row[7]) mg = float(row[8]) al = float(row[9]) alh = float(row[10]) v1 = float(row[11]) x = float(row[12]) ta = str(row[13]) pf = float(row[14]) li = float(row[15]) el = float(row[16]) fc = float(row[17]) lbc = float(row[18]) cbc = float(row[19]) base_menor = float(row[20]) base_maior = float(row[21]) # à seguir são realizados alguns calculos para conversão e ajuste dos dados obtidos da planilha # Tranformação do K e o Na de mg/dm³ para cmolc/dm³ k = k / 390 na = na / 230 # CTC potencial ctctotal = ca + mg + k + na + alh # CTC efetiva ctcef = ca + mg + k + na + al # Soma de Bases sb = ca + mg + k + na # Saturação por bases v2 = sb / ctctotal * 100 # Saturação por aluminio m = al / ctcef * 100 # Número de plantas por hectare numplant = 10000 / (el * li) # Calculos para aplicação de calcário em sulco vol_de_sulco = ((((base_maior + base_menor) * (pf / 100) / 2) * (100 / el * 100)) * 1000) perc_vol_de_sulco = vol_de_sulco / (1000 * 1000 * pf / 10) # Calculamos também a Necessidade de calagem por 3 diferentes métodos # Metódo 1 - Soma de bases ncm1 = ((ctctotal * (v1 - v2)) / 100) # Metódo 2 - Soma de cálcio e mágnesio ideal para a cultura ncm2 = (x - (ca + mg)) # Metódo 3 - Teor de H + Al ncm3 = alh # A condicional If faz a decisão de qual metódo apresenta valor adequado como necessidade de calagem # com base nas recomendações de ..... et al () if ncm1 >= ncm2: nc = ncm1 elif ncm2 <= ncm3: nc = ncm2 else: nc = ncm3 # qc representa a quantidade de calcário segundo a profundidade a ser aplicado em caso de área total qc = nc * pf / 20 # A Condicional if elseif else fará a converção de necessidade de calagem (agora chamada de qc) para # a quantidade de calcário necessário para diferentes métodos de aplicação (faixa, sulco, em cova e área total) # E geramos uma recomendação para cada amostra # Instruções para a recomendação para aplicação em faixa if ta == 'fx': # Conversão de qc para valores a serem aplicado no sulco em g de calcário/planta qc = qc * (1000000 / numplant) * fc / 100 # Texto da recomendação doc.add_paragraph('Prognóstico Para Calagem', 'Title') doc.add_paragraph('Amostra ' + am) doc.add_paragraph('Produtor: ' + pro) doc.add_heading('Quantidade de calcário', 1) p = doc.add_paragraph( 'Para a corrigir o pH do solo e elevar os teores de Ca e Mg indica-se preparar e ' 'aplicar %.2f g de calcário/planta' % qc) p.alignment = 3 doc.save(amx) # Após o relatório ser salvo (linha acima), # a linha abaixo é introduzida para que um novo arquivo .docx vazio seja criado # e utilizado para criação da próxima recomendação doc = docx.Document() # Instruções para a recomendação para aplicação em sulco elif ta == 'sc': # Conversão de qc para valores a serem aplicado no sulco em g de calcário/metro de sulco qc = (qc * perc_vol_de_sulco) / (100 / el * 100) * 1000000 # Texto da recomendação doc.add_paragraph('Prognóstico Para Calagem', 'Title') doc.add_paragraph('Amostra ' + am) doc.add_paragraph('Produtor - ' + pro) doc.add_heading('Quantidade de calcário', 1) p = doc.add_paragraph( 'Para a corrigir o pH do solo e elevar os teores de Ca e Mg indica-se preparar e ' 'aplicar %.2f g de calcário/metro de suco' % qc) p.alignment = 3 doc.save(amx) # Após o relatório ser salvo (linha acima), # a linha abaixo é introduzida para que um novo arquivo .docx vazio seja criado # e utilizado para criação da próxima recomendação doc = docx.Document() # Instruções para a recomendação para aplicação em cova/berço elif ta == 'cv': # Conversão de qc para valores a serem aplicado no sulco em g de calcário/metro de sulco qc = qc * 100 * lbc * cbc # Texto da recomendação doc.add_paragraph('Prognóstico Para Calagem', 'Title') doc.add_paragraph('Amostra ' + am) doc.add_paragraph('Produtor - ' + pro) doc.add_heading('Quantidade de calcário', 1) p = doc.add_paragraph( 'Para a corrigir o pH do solo e elevar os teores de Ca e Mg indica-se ' 'aplicar %.2f g de calcário/cova' % qc) p.alignment = 3 doc.save(amx) # Após o relatório ser salvo (linha acima), # a linha abaixo é introduzida para que um novo arquivo .docx vazio seja criado # e utilizado para criação da próxima recomendação doc = docx.Document() # Instruções para a recomendação para aplicação em área total else: qc = nc doc.add_paragraph('Prognóstico Para Calagem', 'Title') doc.add_paragraph('Amostra ' + am) doc.add_paragraph('Produtor - ' + pro) doc.add_heading('Quantidade de calcário', 1) p = doc.add_paragraph( 'Para a corrigir o pH do solo e elevar os teores de Ca e Mg indica-se ' 'aplicar %.2f t de calcário/ha' % qc) p.alignment = 3 doc.save(amx) # Após o relatório ser salvo (linha acima), # a linha abaixo é introduzida para que um novo arquivo .docx vazio seja criado # e utilizado para criação da próxima recomendação doc = docx.Document() if ca >= 0.4 or al > 0.5 or m > 30: print('Essa amostra precisa de Gessagem')
def get_text(filename): doc = docx.Document(filename) fullText = [para.text for para in doc.paragraphs] return '\n'.join(fullText)
def read_docx(file_name): doc = docx.Document(file_name) content = '\n'.join([para.text for para in doc.paragraphs]) return (content)
def creator(): try: inv_num = input("Enter Invoice Number: ") location = input("Enter Location: ") lot_num = 0 window_amt = input("Enter amt of windows: ") window_price_per = 22 sliders = input("Enter amt of sliders: ") slider_price_per = 40 doors = input("Enter amt of doors: ") doors_price_per = 35 lgDoors = input("Enter amt of large sliders: ") lgDoor_price_per = 20 lgWin = input("Enter amt of large windows: ") lgWin_price_per = 11 price_of_trip = input("Enter Price of Trip: ") desc = input("Enter description of services if applicable: ") total = ((int(window_amt) * int(window_price_per)) + (int(lgDoors) * int(lgDoor_price_per)) + (int(sliders) * int(slider_price_per)) + (int(doors) * int(doors_price_per)) + (int(lgWin) * int(lgWin_price_per)) + int(price_of_trip)) document = docx.Document() document.add_heading(f' Invoice #{inv_num}', 0) p = document.add_paragraph(""" Chad J Willes Construction 371 west 500 south Lehi UT, 84043 801-706-8523 """) document.add_paragraph(""" To: Alside Exterior Building Products 915 West 2610 South Salt Lake City Ut 84119 Att: Andrew Germaine """) main_p = document.add_paragraph("") main_p.add_run( 'This invoice is for all the work needed to install windows to the Alside Standard' ).bold = True main_p.add_run(f"\n {location}") main_p.add_run(f"\nLot: {lot_num}") main_p.add_run( f"\nWindows @${window_price_per} per x{window_amt}" ) main_p.add_run( f"\nSliders @${slider_price_per} per x{sliders}" ) main_p.add_run( f"\nDoors @${doors_price_per} per x{doors}" ) main_p.add_run( f"\nLarge Windows @${lgWin_price_per} per x{lgWin}") main_p.add_run( f"\nLarge Sliders @${lgDoor_price_per} per x{lgDoors}") main_p.add_run(f"\nTrip Price: ${price_of_trip}") main_p.add_run(f"\n{desc}") main_p.add_run( f"\nTOTAL ${total}" ).underline = True try: document.save('C:/Users/Owner/Documents/Invoices/' + f'Invoice for Alside {inv_num} {location}.docx') print("\n Invoice Created!") # message = MIMEMultipart() # mail_content = ''' # Invoice For Windows # ''' # message.attach(MIMEText(mail_content, 'plain')) # message['Subject'] = "Invoice" # attach_file_name = f'C:/Users/terra/Documents/Invoices/Invoice for Alside {inv_num} {location}.docx' # attach_file = open(attach_file_name, 'rb') # Open the file as binary mode # payload = MIMEBase('application', 'octate-stream') # payload.set_payload((attach_file).read()) # encoders.encode_base64(payload) #encode the attachment # #add payload header with filename # payload.add_header('Content-Decomposition', 'attachment', filename='Invoice1.docx') # message.attach(payload) # r = requests.post( # "https://api.mailgun.net/v3/sandbox2a89baf534dc4c6a88ecc092c09a65e8.mailgun.org/messages.mime", # auth=("api", "9077b561bf5dbb09635c5372373e3182-f696beb4-2fb9e654"), # data={"from": "*****@*****.**","to": "*****@*****.**"}, # files={"message": bytes(str(message), "utf-8")}) creator() except Exception as e: print(f"\nInvoice Already Exists, Try Again. Error: {e}") creator() except Exception as e: print( f"\n Program Broke for some reason. Try Again. \n Error Code for Ethon: {e}" ) creator()
['LDAP 인젝션', 'LI', 'X'], ['운영체제 명령 실행', 'OC', 'X'], ['SQL 인젝션', 'SI', 'X'], ['SSI 인젝션', 'SS', 'X'], ['Xpath 인젝션', 'XI', 'X'], ['디렉터리 인덱싱', 'DI', 'O'], ['정보 누출', 'IL', 'O'], ['악성 콘텐츠', 'CS', 'X'], ['크로스사이트 스크립팅', 'XS', 'O'], ['약한 문자열 강도', 'BF', 'X'], ['불충분한 인증', 'IA', 'X'], ['취약한 패스워드 복구', 'PR', 'X'], ['크로스사이트 리퀘스트 변조(CSRF)', 'CF', 'X'], ['세션 예측', 'SE', 'X'], ['불충분한 인가', 'IN', 'X'], ['불충분한 세션 만료', 'SC', 'X'], ['세션 고정', 'SF', 'X'], ['자동화 공격', 'AU', 'X'], ['프로세스 검증 누락', 'PV', 'O'], ['파일 업로드', 'FU', 'X'], ['관리자 페이지 노출', 'AE', 'O'], ['경로 추적', 'PT', 'X'], ['위치 공개', 'PL', 'O'], ['데이터 평문 전송', 'SN', 'X'], ['쿠키변조', 'CC', 'O']] try: doc = docx.Document('test3.docx') except Exception as e: print("파일이 존재하지 않는듯? -> " + str(e)) # 테스트용 함수들.. def table_info(table_num, row_num): # 실구동때는 사용하지 않음 특정 테이블의 정보를 보고싶을때 사용하면 됨. # talbe_name = 테이블의 인덱스 / row_num 줄번호 print("*" * 30) print('줄수 : ' + str(len(doc.tables[table_num].rows))) print('칸수 : ' + str(len(doc.tables[table_num].columns))) print("*" * 30) for num in range(len(doc.tables[table_num].row_cells(row_num))): print(doc.tables[table_num].row_cells(row_num)[num].text.replace(
def renumber_refs(inp, output, refs_input=None, refs_output='new_refs.xlsx', start=1): if refs_input == None: if exists(inp[:-4] + 'xlsx'): refs_input = inp[:-4] + 'xlsx' if refs_input: lit = pd.read_excel(refs_input) lit.n = lit.n.astype(dtype=np.dtype(int)) lit.set_index(keys='n', inplace=True) doc = docx.Document(inp) new_n = {} do_analysis = True for p in paragraph_iterator(doc): # doc.paragraphs: if stop_tag.search(p.text): do_analysis = False if continue_tag.search(p.text): do_analysis = True if not do_analysis: continue for ss in t.findall(p.text): for sss in unpack_ref(ss)[1]: if refs_input and (not (int(sss) in lit.index)): new_n[sss] = 0 lit.loc[int(sss)] = 'Not found' continue if not sss in new_n: new_n[sss] = start start += 1 r = list(map(int, new_n.keys())) if refs_input: # for rr in r: # if not rr in lit: # lit.loc[rr] = str(rr) lit = lit.loc[r] lit['new_n'] = list(new_n.values()) lit = fix_dublicates(lit) else: lit = pd.DataFrame( data={ 'n': list(new_n.keys()), 'new_n': list(new_n.values()), 'title': list(new_n.keys()) }) lit.set_index(keys='n', inplace=True) print(lit) doc = docx.Document(inp) do_replacement = True for p in paragraph_iterator(doc): # doc.paragraphs: if stop_tag.search(p.text): do_replacement = False if continue_tag.search(p.text): do_replacement = True if not do_replacement: continue ss = t.search(p.text) while ss: l = ss[1] new_text = pack_ref(lit.loc[map(int, unpack_ref(l)[1])]['new_n']) if new_text == '': print(ss[1], '-> removing') replace_text_in_runs(p.runs, ss.start(), ss.end(), new_text) else: print(ss[1], '->', new_text) replace_text_in_runs(p.runs, ss.span(1)[0], ss.span(1)[1], new_text) ss = t.search(p.text, pos=ss.span(1)[1] + 1) doc.save(output) lit.drop_duplicates(subset='new_n', inplace=True) lit['n'] = lit['new_n'] lit = lit.loc[lit.title != 'Not found'] lit.to_excel(refs_output, columns=['n', 'title'], index=False)
import xlwt docxPath2 = r'D:\act选房namecn.docx' # docxPath='D:\hjzb数据库字典_简化.docx' #docxPath=r'D:\act选房.docx' # r代表非转义 docxPath = r'D:\act在线选房全.docx' #docxPath=r'D:\华发在线售楼处.doc' row = 0 # 记录行数 tableSum = 0 # 记录表个数 workbook = xlwt.Workbook(encoding='utf-8') worksheet = workbook.add_sheet("sheet1") style = xlwt.easyxf('pattern: pattern solid, fore_colour ice_blue') # 创建一个已存在的 word 文档的对象 file2 = docx.Document(docxPath2) # 创建一个已存在的 word 文档的对象 file = docx.Document(docxPath) # 读取每个段落的内容并输出 # for it in file.paragraphs: # print( it.text ) paragraphsList = [it.text for it in file.paragraphs if len(it.text) > 0] print("段落数量:" + str(len(file.paragraphs))) print("表格数量:" + str(len(file.tables))) #480 print("paragraphsList数量:" + str(len(paragraphsList))) dist_table = {} # 读取表格中的内容并输出
def data(self): doc = docx.Document(self.doc_path) raw_data = [] for para in doc.paragraphs: raw_data.append(para.text.strip()) return raw_data
help='Style for text') parser.add_argument('--code_style', default='Normal', help='Style for code') parser.add_argument('--print_styles', action='store_true', help='Print styles available in template') parser.add_argument('--verbose', '-v', action='store_true', help='Print various information') args = parser.parse_args() if args.template is not None: d = docx.Document(args.template) else: d = docx.Document() if args.print_styles: print('=================') print('Styles available:') for s in d.styles: print(s.name) print('=================') exit(0) d = modify_document(d, args.input, args) d.save(args.output)
rev = Review(reviewAuthor, reviewPosition, reviewCompany, reviewRating, reviewDict, days) print "Review created for %s..." % rev.name[0] sys.stdout.flush() return rev # Create array of Review objects and populate with our reviews reviewGuide = [] for num in range(reviewNum): reviewGuide.append(findMaterials(links[num])) # Sort our list based on date posted #reviewGuideSorted = sorted(reviewGuide, key=attrgetter('day'), reverse=True) # Create document and insert main heading doc = docx.Document() doc.add_heading('Trust Radius Weekly Report', 0) # Func createPage: page (param): an instance of a Review object def createPage(page): # Insert Review Info doc.add_heading(page.name, 1) doc.add_heading("%s at %s" % (page.position[0], page.company[0]), 3) doc.add_heading(page.day.strftime('%B %d, %Y'), 3) doc.add_heading(page.rating + ' out of 10 stars', 3) # Insert Review Text for x, y in page.goodies.items(): doc.add_heading(x, 4) doc.add_paragraph(y)
# Read and write word document import docx # New document object. d = docx.Document('docx_files/demo.docx') # List of paragraph objects. print(d.paragraphs) # One paragraph object. print(d.paragraphs[0]) # Text of the one paragraph object. print(d.paragraphs[0].text) print(d.paragraphs[1].text) # Save the second paragraph object to a variable. p = d.paragraphs[1] # List of run objects (Runs = whenever there is a change in style.) # p should have 4. print(p.runs) # Run objects also have a text member. print(p.runs[0].text) print(p.runs[1].text) print(p.runs[2].text) print(p.runs[3].text) # Can also check it
def addruns(filename, runs): # Add runs to a paragraph. doc = docx.Document(filename) for r in runs: doc.add_paragraph(' ').add_run(r) doc.save(filename)
Al Sweigart Robocop Write a program that would generate a Word document with custom invitations that look like Figure 13-11. You can download a sample guests.txt file from http://nostarch.com/automatestuff/. ''' import docx guestFile = open('guests.txt', 'r') guestStr = guestFile.read() guestLst = guestStr.split('\n') guestFile.close() doc = docx.Document('invitations_empty.docx') for guest in guestLst: doc.add_paragraph('It would be a pleasure to have the company of',\ style='InvText') doc.add_paragraph(guest, style='InvName') doc.add_paragraph('At 111010 Memory Lane on the Evening of',\ style='InvText') doc.add_paragraph('April 1st', style='InvDate') doc.add_paragraph('at 7 o\'clock', style='InvText') doc.add_page_break() doc.save('invitations.docx')