예제 #1
1
def reportAddendum(reportFile, listofOutputsTobeAdded, listOfComments=None):
    """
    Enables to  add figures to a report in an automated way.
    The changes intervine after the last chapter of the report.
    """
    if ".docx" in reportFile:
        document = Document(reportFile)
    else:
        raise Warning(" problem with the format of the file")
    paragraph_styles = [s for s in document.styles if s.type == WD_STYLE_TYPE.PARAGRAPH]

    postProcessingHeader = document.add_paragraph("PostProcessing", style=paragraph_styles[1])
    postProcessingText = document.add_paragraph(style=paragraph_styles[0])
    postProcessingText.add_run("\rWord format postprocessing generated through the ")
    postProcessingText.add_run("etumos ").bold = True
    postProcessingText.add_run("coupling tool\r\r")
    # document.add_heading('Outputs', level=1)
    ind = 0
    for picture in listofOutputsTobeAdded:
        document.add_picture(picture, width=Cm(7.00))
        document.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.CENTER
        newParagraph = document.add_paragraph("\r")
        newParagraph.add_run("Figure n. " + str(ind) + ":   ").bold = True
        if listOfComments != None:
            newParagraph.add_run(str(listOfComments[ind]))
        ind += 1
        newParagraph.add_run(" \r \r")
        newParagraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
    reportFile = reportFile.replace(".docx", "")
    filenameToBeSaved = reportFile + "_pp.docx"
    document.save(filenameToBeSaved)
    return None
예제 #2
0
파일: views.py 프로젝트: StuJ/rua
def create_review_form(submission, review_form):
	document = Document()
	document.add_heading(submission.title, 0)
	p = document.add_paragraph('You should complete this form and then use the review page to upload it.')
	relations = models.FormElementsRelationship.objects.filter(form=review_form).order_by('order')
	for relation in relations:

		if relation.element.field_type in ['text', 'textarea', 'date', 'email']:
			document.add_heading(relation.element.name+": _______________________________", level=1)
			document.add_paragraph(relation.help_text).italic = True

		if relation.element.field_type in ['select', 'check']:
			document.add_heading(relation.element.name, level=1)
			if relation.element.field_type == 'select':
				choices = render_choices(relation.element.choices)
			else:
				choices = ['Y', 'N']

			p = document.add_paragraph(relation.help_text)
			p.add_run(' Mark your choice however you like, as long as it is clear.').italic = True
			table = document.add_table(rows=2, cols=len(choices))
			hdr_cells = table.rows[0].cells
			for i, choice in enumerate(choices):
				hdr_cells[i].text = choice[0]
			table.style = 'TableGrid'

	document.add_page_break()
	if not os.path.exists(os.path.join(settings.BASE_DIR, 'files', 'forms')):
		os.makedirs(os.path.join(settings.BASE_DIR, 'files', 'forms'))
	path = os.path.join(settings.BASE_DIR, 'files', 'forms', '%s.docx' % str(uuid4()))

	document.save(path)
	return path
예제 #3
0
 def WriteTiezi(self):
     if len(self.pages)==0:
         print 'Error!'
     document = Document()
     # style = document.StyleSheet
     # style.Fonts.append(Font("\\'cb\\'ce\\'cc\\'e5", 'modern', 134, 30))
     # section = Section()
     # document.Sections.append(section)
     # tps = TextPS(font=getattr(style.Fonts, "\\'cb\\'ce\\'cc\\'e5"))
     for i in range(self.page_num):
             now_page = self.pages[i]
             for a in now_page:
                 for b in a:
                     document.add_paragraph(b.decode())
                 if len(a)>1:
                     # pic_num = len(self.img[a[0]])
                     if self.img.has_key(a[0]):
                         for k in self.img[a[0].decode()]:
                             pic_name = self.getImg(k)
                             document.add_picture(self.dirname + '/' + pic_name)
                 document.add_paragraph('---------------------------------')
     name = self.url.strip().split('/')
     name = name[-2] + name[-1]
     document.save(self.dirname + '/' + name + '.docx')
     print "Success to dump into " + name + '.docx'
예제 #4
0
def save2Word():
    file_name = './word/'+title+'.docx'


    with open('./txt/mainContent.txt','a') as f1:
        with open('./txt/comment.txt','rb') as f2:
            f1.write(f2.read())

    with open('./txt/mainContent.txt','rb') as article_complete:

        document = Document()
        for line in article_complete:
            if len(line) == 7 or len(line) == 13 or line == '\n':continue #手动换行符暂时解决不了

            img_url = re.findall(r'(http:.*?\.jpg|http:.*?\.png|http:.*?\.gif)',line,re.S)
            line = re.sub(r'(http:.*?\.jpg|http:.*?\.png|http:.*?\.gif)','',line)
            for img in img_url:
                imgName = saveImg(img)
                if imgName != 0:
                    try:
                        document.add_picture('./img/'+imgName, width=Inches(3.25))
                    except Exception:
                         document.add_paragraph(u'图片加载失败....')
                         document.add_paragraph(img)
                else:
                    document.add_paragraph(u'图片加载失败....')
                    document.add_paragraph(img)
            document.add_paragraph(line.decode('UTF-8'))

        document.save(file_name.decode('UTF-8'))
예제 #5
0
class DocXRenderer(mistune.Renderer):
    def __init__(self, *largs, **kargs):
        self.doc = Document()
        self.clist = None
        super(DocXRenderer, self).__init__(*largs, **kargs)

    def list_item(self, text):
        return "{}\n".format(text)
      
    def list(self, body, ordered=True):
        if ordered:
            style = "ListNumber3"
        else:
            style = "ListBullet"
        for i in body.rstrip().split("\n"):
            self.doc.add_paragraph(i, style)
        return ''
        
    def header(self, text, level, raw=None):
        self.doc.add_heading(text, level)
        return ''

    def paragraph(self, text):
        self.doc.add_paragraph(text, style=None)
        return ''
예제 #6
0
def convert_pdf(path='provide path here', format='text', codec='utf-8'):
    rsrcmgr = PDFResourceManager()
    retstr = BytesIO()
    laparams = LAParams()
    if format == 'text':
        device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
    else:
        raise ValueError('Please provide the format to extract')
    fp = open(path, 'rb')
    interpreter = PDFPageInterpreter(rsrcmgr, device)
    maxpages = 500 #mention the maximum pages here (Note: Large number of pages will decrease the performance.)
    caching = True
    page_numbers=set()
    for page in PDFPage.get_pages(fp, page_numbers, maxpages=maxpages,caching=caching, check_extractable=True):
        interpreter.process_page(page)
    text = retstr.getvalue().decode()
    fp.close()
    device.close()
    retstr.close()
    bulletins_data = re.findall('•([^•]+)*', str(text))
    list_of_bullet_points = []
    json_dict = {}
    for points in bulletins_data:
        list_of_bullet_points.append(points)
    json_dict['bulletins'] = list_of_bullet_points
    json_data= json.dumps(json_dict)
    parsed = json.loads(json_data)
    final_data = json.dumps(parsed, indent=4, sort_keys=True) #creates a pretty json with the data extracted
    document = Document()  # creates a new document
    document.add_heading('Bulletins data in the PDF')
    document.add_paragraph(str(final_data))
    document.save('json_data.docx')  # saves it to the filesystem
    os.startfile("json_data.docx")  # will open the file
    return ''
예제 #7
0
class XQHTMLParser(HTMLParser):
    def __init__(self, docfile):
        HTMLParser.__init__(self)
        self.docfile = docfile
        self.doc = Document(docfile)
        self.myclient = HTMLClient()
        self.text = ''
        self.title = False
        self.isdescription = False
        self.picList=[]
    def handle_starttag(self, tag, attrs):
        #print "Encountered the beginning of a %s tag" % tag
        self.title = False
        self.isdescription = False
        if re.match(r'h(\d)', tag):
            self.title = True 
        if tag == "img":
            if len(attrs) == 0: pass
            else:
                for (variable, value)  in attrs:
                    if variable == "src":
                        picdata = self.myclient.GetPic(value.split('!')[0])
                        if picdata == None:
                            pass
                        else:
                            pictmp = value.split('/')[-1].split('!')[0]
                            picfix = value.split('/')[-1].split('!')[-1]
                            with open(pictmp, 'wb') as pic:
                                pic.write(bytes(picdata))
                                pic.close()
                            #if os.path.getsize(pictmp) < 90000:
                            try:
                                if picfix[0:1] == 'c':
                                    self.doc.add_picture(pictmp, width=Inches(4.5))
                                else:
                                    self.doc.add_picture(pictmp)#, width=Inches(2.25))
                            except docx.image.exceptions.UnexpectedEndOfFileError as e:
                                print(e)
                            self.picList.append(pictmp)
        if tag == 'script':
            self.isdescription = True
    def handle_data(self, data):
        if self.title == True:
            if self.text != '':
                self.doc.add_paragraph(self.text)
            self.text = ''
            self.doc.add_heading(data, level=2)
        if self.isdescription == False:
            self.text += data
    def handle_endtag(self, tag):
        #if tag == 'br' or tag == 'p' or tag == 'div':
        if self.text != '':
            self.doc.add_paragraph(self.text)
            self.text = ''
    def complete(self, html):
        self.feed(html)
        self.doc.save(self.docfile)
        for item in self.picList:
            if os.path.exists(item):
                os.remove(item)
예제 #8
0
def create_docx(path, key, filename='pics', width=None, fontsize=9):
    from docx import Document
    from docx.shared import Inches
    from docx.shared import Pt

    files=listdir(path)
    file_list=[]
    for i in files:
        if i.find(key) is not -1:
            print ' %s is found' %i
            file_list.append(i)

    doc = Document()
    para = doc.add_paragraph()
    run = para.add_run()
    font = run.font
    font.name = 'Calibri'
    font.size = Pt(fontsize)

    if width is None:
        width=5.5/len(file_list)

    for pic in file_list:
        short_path=path.split('/')[-3]+'/'+path.split('/')[-2]+'/'+path.split('/')[-1]
        run.add_text(short_path+pic+':  \n')

    para = doc.add_paragraph()
    run = para.add_run()
    for pic in file_list:
        run.add_text('  \n')
        run.add_picture(path+pic, width= Inches(width))
    doc.save_prjt('%s.docx' % filename)
    print ' save %s.docs' %filename
예제 #9
0
def get_brief_name(expression, con):
    # Fetch all kinds of object_name from all_objects
    find_object_info = '''select OBJECT_TYPE, OBJECT_NAME, OWNER
                            from ALL_OBJECTS where regexp_like(OBJECT_NAME ,:expression)'''
    cur = con.cursor()

    cur.execute(find_object_info, {'expression': str(expression)})
    object_info = cur.fetchall()

    view_name = Document()
    index_name = Document()
    package_name = Document()
    trigger_name = Document()
    sequence_name = Document()
    for res in object_info:
        # print res
        if str(res[0]) == 'VIEW':
            view_name.add_paragraph(str(res[1]))
        if str(res[0]) == 'INDEX':
            index_name.add_paragraph(str(res[1]))
        if str(res[0]) == 'PACKAGE':
            package_name.add_paragraph(str(res[1]))
        if str(res[0]) == 'TRIGGER':
            trigger_name.add_paragraph(str(res[1]))
        if str(res[0]) == 'SEQUENCE':
            sequence_name.add_paragraph(str(res[1]))
    view_name.save('ViewName.docx')
    index_name.save('IndexName.docx')
    package_name.save('PackageName.docx')
    trigger_name.save('TriggerName.docx')
    sequence_name.save('SequenceName.docx')
    def WriteDoc(self, song_title_list, song_lyric_dict):
        document = Document("template.docx")
        for index, title in enumerate(song_title_list):
            #p = document.add_paragraph(TITLE_PREFIX + title).bold = True
            p = ""
            if index == 0:
                p = document.paragraphs[0]
            else:
                p = document.add_paragraph()

            run = p.add_run(TITLE_PREFIX + title)
            font = run.font
            font.bold = True
            #font.name = 'SimSun'
            font.name = 'Microsoft YaHei'
            font.size = doc_Pt(12)
            #p = document.add_paragraph()

            song_lyric_paragraph_list = song_lyric_dict.get(title)

            if song_lyric_paragraph_list == None:
                continue

            for song_lyric_paragraph in song_lyric_paragraph_list:
                for song_lyric_ in song_lyric_paragraph:
                    p = document.add_paragraph()
                    run = p.add_run(song_lyric_)
                    font = run.font
                    font.size = doc_Pt(12)
                p = document.add_paragraph()

        document.save(today + '.docx')
        return
def generate_documents(recipes):
	create_output_dir()
	for recipe in recipes:
		doc = Document()

		heading_r = doc.add_paragraph().add_run(recipe.title.string)
		heading_r.font.size = Pt(14)
		heading_r.font.bold = True
		heading_r.font.underline = True

		ingredients_p = doc.add_paragraph()
		ingredients_r = ingredients_p.add_run("Zutaten")
		ingredients_r.font.size = Pt(13)
		ingredients_r.font.bold = True
		for ingredient in recipe.ingredients:
			ingredient_li_p = doc.add_paragraph(style = "ListBullet")
			ingredient_li_p.add_run(ingredient).font.size = Pt(13)

		instructions_p = doc.add_paragraph()
		instructions_r = instructions_p.add_run("Zubereitung")
		instructions_r.font.size = Pt(13)
		instructions_r.font.bold = True
		for text in recipe.texts:
			text_r = doc.add_paragraph().add_run(text)
			text_r.font.size = Pt(13)

		doc.save(os.path.join(output_dir, recipe.title.string + ".docx"))
예제 #12
0
 def _save_results(filename, abstracts):
     document = Document()
     for title, content in abstracts:
         document.add_heading(title, level=0)
         for p in content.split("\r\n"):
             document.add_paragraph(p)
         document.add_page_break()
     document.save("./result/" + filename)
def deployaword (self, texto, nombre):
    document = Document()
    
    for n in texto:
        document.add_paragraph(
        str(n), style='ListBullet')
        
    document.save(nombre+'.docx')
    
예제 #14
0
def redact(redacters, doc):
    redacted = Document()

    for p in doc.paragraphs:
        p_r = p.text.lower()
        for r in redacters:
            p_r = re.sub(r, '[REDACTED]', p_r)
        redacted.add_paragraph(p_r)
    return redacted
예제 #15
0
def writeDoc(title,content):
	docName = title+'.docx'
	document = Document()

	document.add_heading(title, 0)

	document.add_paragraph(content)
	
	document.save(docName)
예제 #16
0
def get(name):
	saveFile = os.getcwd()+"/"+name
	try:
		doc = Document()
		doc.add_heading("HEAD".decode('gbk'),0)
		doc.add_paragraph("HEAD CONTENT".decode('gbk'))
		doc.save(saveFile)
	except :
		print("error")
예제 #17
0
파일: host3.py 프로젝트: zhoubin50/ovs_test
 def writeresult(self, vm_mtu):
     ori_path, extract_path = self.create_file(vm_mtu)
     ori_docx = Document()
     extract_xlsx = Workbook()
     self.create_excel_sheet(extract_xlsx)
     column = 1
     self.write_ori_head(ori_docx)
     s = HostProcess(CONF[self.pair], vm_mtu, self.output_q, 'server', self.s_cmd_q, self.s_result, self.s_check_q)
     c = HostProcess(CONF[self.pair], vm_mtu, self.output_q, 'client', self.c_cmd_q, self.c_result)
     s.start()
     c.start()
     run_cmd_times = 0
     for item in CONF.cmd_list:
         ori_docx.add_paragraph(item.upper(), style='Heading 4')
         s_cmd = self.create_cmd('server', item)
         c_cmd = self.create_cmd('client', item)
         for seq in range(CONF.test_times):
             self.s_cmd_q.put(s_cmd)
             while not self.s_cmd_q.empty():
                 time.sleep(0.2)
             self.s_check_q.get()
             time.sleep(2)
             self.c_cmd_q.put(c_cmd)
             try:
                 s_result = self.s_result.get(timeout=240)
                 c_result = self.c_result.get(timeout=240)
             except:
                 self.kill_process(CONF[self.pair], 'server')
                 s_result = self.s_result.get(timeout=240)
                 c_result = self.c_result.get(timeout=240)
             self.output_q.put('wait all cmd run complete')
             self.write_ori_to_docx(ori_docx, seq, s_cmd, c_cmd, s_result, c_result)
             extracted_result = self.extract_result(c_result)
             ws = extract_xlsx.get_sheet_by_name(u'第{}组数据'.format(str(seq+1)))
             for index, result_cell in enumerate(extracted_result):
                 self.output_q.put(extracted_result)
                 self.output_q.put(result_cell)
                 ws.cell(row=2, column=column+index, value=result_cell)
                 all_result = self.collect_all_result.get()
                 all_result[seq][self.num][column+index] = result_cell
                 self.collect_all_result.put(all_result)
             run_cmd_times += 1
             self.output_q.put(run_cmd_times)
             self.all_cmd_q.put('cmd_run')
             while self.all_cmd_q.qsize() != run_cmd_times * self.pair_num:
                 time.sleep(0.3)
             time.sleep(3)
         column += len(extracted_result)
     ori_docx.save(ori_path)
     extract_xlsx.save(extract_path)
     self.output_q.put('save_xlsx')
     self.s_cmd_q.put('over')
     self.c_cmd_q.put('over')
     s.join()
     c.join()
     self.output_q.put('over')
예제 #18
0
def writeDoc(docTitle, feed, fileName):
    # Setting up word doc for file writing
    document = Document()
    document.add_heading(fileName, 0)

    # Writing it out
    document.add_paragraph(unicode(feed, 'utf-8'))
    try :
        document.save('I:\dev_JC\_Python\Data_Pull\WD\\' + fileName + '.docx')
    except IOError:
        document.save('I:\dev_JC\_Python\Data_Pull\WD\\' + "Bad_Name" + '.docx')
예제 #19
0
def substituteVariablesDocx(config, file_name_in, fileNameOut, subs):
    c = Context(subs)
    doc_in = Document(docx=file_name_in.replace("/./", "/").replace("\\.\\", "\\").replace("\\", "/"))
    doc_temp = Document()
    paras=doc_in.paragraphs
    fullText="" 
    i = 0
    styles = {}
    for para in paras:
        paraText=""
        p = doc_temp.add_paragraph(style = para.style)
        docx_copy_para_format_from(p, para)
        j = 0
        runs = para.runs
        for run in runs:
            txt = run.text
            paraText+= txt+"+"+str(j)+"+run+"
            r = p.add_run(text = txt, style=run.style)
            docx_copy_run_style_from(r, run)
            j+=1
        fullText+= paraText+str(i)+"+para+"
        i+=1
    fullText = preprocess(fullText)
    t = get_engine(config).from_string(fullText)
    xtxt = t.render(c)
    xtxt = apply_sequence(xtxt)
    xParaTxts = xtxt.split("+para+")
    for p in paras:
        removePara(p)

    doc_in.paragraphs.clear()
    paras=doc_temp.paragraphs
    for xParaTxt in xParaTxts:
        runTxts = xParaTxt.split("+run+")
        if runTxts[-1]!='':
            para_n = int(runTxts[-1])
            p = doc_in.add_paragraph(style=paras[para_n].style)
            docx_copy_para_format_from(p, paras[para_n])
            for runTxt in runTxts[:-1]:
                try:
                    txt = runTxt.split("+")[-2]
                except:
                    txt=""
                run_n = int(runTxt.split("+")[-1])
                r = p.add_run(text=txt, style=paras[para_n].runs[run_n].style)
                docx_copy_run_style_from(r, paras[para_n].runs[run_n])
            if isControlLine(paras[para_n].text):
                p.text="{}"

    for p in doc_in.paragraphs:
        if p.text=="{}":
            removePara(p)
    doc_in.save(fileNameOut)
    return {"file":fileNameOut}
예제 #20
0
class HoroDocument():

	def generate(self, content):
		self.doc = Document()
		for item in content:
			self.doc.add_heading(item['section_title'], level=1)
			for para in item['paragraph']:
				self.doc.add_heading(para['title'], level=3)
				self.doc.add_paragraph(para['content'])

	def save(self, filename):
		self.doc.save(filename)
예제 #21
0
def make_entry(res):
    # pull out the target notebook date from the regex
    date = dateutil.parser.parse(res.match.group('date'))
    logging.debug("new notebook target date: %s", date)

    # query the database for announcements from the specified date
    # TODO: build a new brain api and refactor?
    announcements = res.robot.brain.db.search((where('plugin') == 'notebook')
        & (where('type') == 'announcement')
        & (where('date') == date.isoformat())
        )

    logging.debug("announcements are %s", announcements)

    # if there actually _are_ announcements for that date
    if announcements != []:
        document = None
        if res.robot.config['plugin_config']['notebook']['append']:
            document = Document(res.robot.config['plugin_config']['notebook']['file'])
        else:
            # create a new docx document object
            document = Document()

        # fill in a bunch of boilerplate
        document.add_page_break()
        document.add_heading('{date}, the BEC'.format(date=date.strftime('%m/%d/%Y')), level=1)
        document.add_heading('Announcements:', level=2)

        # pull out a list of all the users that had announced
        users = set(map(lambda a: a['user'], announcements))

        logging.debug("users are %s", users)

        # for each user who has announced
        for user in sorted(users):
            # get their real name
            real_name = res.robot.slack_client.server.users.find(user).real_name
            logging.debug("announcing user %s is %s", user, real_name)
            # create a new heading for them
            document.add_paragraph("{}:".format(real_name))
            # for every one of their posts
            for announcement in announcements:
                if announcement['user'] == user:
                    # create a bullet-point for that announcement
                    document.add_paragraph("{}".format(announcement['announcement']),
                                           style='ListBullet')

        # TODO: onedrive
        document.save(res.robot.config['plugin_config']['notebook']['file'])
    else:
        # let the user know that that meeting date doesn't exist
        res.reply(res.msg.user, "No announcements for date {}".format(date.strftime('%m/%d/%Y')))
예제 #22
0
def doit():

  host = '192.168.1.254'
  user = '******'
  password = '******'
  database = 'renrenbx'
  
  mysql_init(mysql_host = host, mysql_database = database, mysql_user = user, mysql_password = password)  
  
  tables = {}
  
  for column in table_schema(database):
    tables[column['TABLE_NAME']] = {'info':column,'columns':[]} 
        
  for column in table_colume(database):
    tables[column['TABLE_NAME']]['columns'] += [column]  
    
  document = Document()
  document.add_heading(database, 0)
  
  i = 0
  max = len(tables)
  
  for key in sorted(tables.keys()):
    
    i = i + 1
    value = int(round((i * 1.0) / max * 100))
    
    sys.stdout.write(' [' + '#' * i + '] %s%%' % value + '\r')
    sys.stdout.flush()
    
    document.add_heading(key, 1)
    table_engine = tables[key]['info']['ENGINE']
    paragraph = document.add_paragraph()
    paragraph.add_run(table_engine).bold = True
    table_comment = tables[key]['info']['TABLE_COMMENT']
    paragraph = document.add_paragraph()
    paragraph.add_run(table_comment if table_comment else u'无注释').bold = True
    table = document.add_table(rows = 1, cols = 4)
    hdr_cells = table.rows[0].cells
    hdr_cells[0].text = u'字段'
    hdr_cells[1].text = u'主键'
    hdr_cells[2].text = u'类型'
    hdr_cells[3].text = u'注释'
    for column in tables[key]['columns']:
      row_cell = table.add_row().cells
      row_cell[0].text = column['COLUMN_NAME'] 
      row_cell[1].text = column['COLUMN_KEY'] if column['COLUMN_KEY'] else '-'
      row_cell[2].text = column['COLUMN_TYPE'] 
      row_cell[3].text = column['COLUMN_COMMENT'] if column['COLUMN_COMMENT'] else '-'
  
    document.save('%s-%s.docx' % (database,datetime.datetime.now().strftime("%Y%m%d%H")))
예제 #23
0
    def finish_report(self, arg, text, extra):
        easygui.msgbox("Reached", title="Warning")
        remove = set(self.lst)
        sorted_list = list(remove)
        document = Document()
        headings = ['All Files', 'File System Info', 'Deleted Files', 'Deleted Folders', 'Partition Info']

        for entry in sorted_list:
            print entry

            document.add_heading(headings[entry])
            document.add_paragraph(self.storage_array[entry + 1])
        document.save('test.docx')
예제 #24
0
def generatedocx(logs, quarter):
	# This function will generate the .docx file for the report, given the list
	# of logs in the form of the named tuple.

	#create the document
	report = Document()

	logs.sort(key=lambda tup: tup[2])

	testlogs = {}
	for log in logs:
		if log.psa not in testlogs:
			testlogs[log.psa] = {}
                if log.date not in testlogs[log.psa]:
			testlogs[log.psa][log.date] = []
		testlogs[log.psa][log.date].append(log.time)

	#Generate the title
	if date.today().month == 1:
		report.add_heading('KTEQ-FM Quarterly Issues Report Q' + str(quarter) + ' '+ str(date.today().year-1), level=0)
	else:
		report.add_heading('KTEQ-FM Quarterly Issues Report Q' + str(quarter) + ' '+ str(date.today().year), level=0)

	report.add_paragraph('This document is the quarterly Community Issues Report for KTEQ-FM. It details a number of community issues discussed during programming throughout the quarter, and lists public service announcements that support these issues. This list contains all of the public service announcements played on air by live DJs. For a complete list, including automated public service announcements, contact KTEQ-FM management at [email protected]')

	table = report.add_table(rows=1, cols=3)
	hdr = table.rows[0].cells
	hdr[0].text = 'Date Played'
	hdr[1].text = 'Time Played'
	hdr[2].text = 'PSA Title'

	for entry in logs:
		row = table.add_row().cells
		row[0].text = str(entry.date)
		row[1].text = str(entry.time)
		row[2].text = entry.psa

	'''for psa in testlogs:
		report.add_heading(psa, level=1)
		table = report.add_table(rows=len(testlogs[psa]), cols=2)
		for psadate in testlogs[psa]:
			row = table.add_row().cells
			row[0].text = str(psadate)
			row[1].text = str(testlogs[psa][psadate])'''

	reportdir = 'reports/' + str(date.today().year)
	if not os.path.exists(reportdir):
		os.makedirs(reportdir)

	outputfile = reportdir + '/Q' + str(quarter) + '_Issues.docx'
	report.save(outputfile)
예제 #25
0
    def OnButton4Button(self, event):
        path = os.getcwd()
        vs.tr1300.SCPI.MMEMory.STORe.IMAGe = path + '\\temp.png'
        document = Document()
        document.add_heading('TEST REPORT',0)
        
        document.add_heading( u'品名' , level=1)
        document.add_paragraph(self.textCtrl1.Value, style='IntenseQuote')
        
        document.add_heading(u'型號', level=1)
        document.add_paragraph( self.textCtrl2.Value, style='IntenseQuote')
        
        


        document.add_picture(path + '\\temp.png', width=Inches(6))
        
        a = time.localtime()

        s = '%04d/%02d/%02d %02d:%02d:%02d' % (a[0],a[1],a[2],a[3],a[4],a[5])
        
        document.add_paragraph(s)
        t = path + '\\%04d%02d%02d_%02d%02d%02d.docx' % (a[0],a[1],a[2],a[3],a[4],a[5])        
        
        
        document.add_paragraph('Test by ' + self.choice1.GetString(self.choice1.GetSelection()))        
        document.save(t)    
        vs.tm += 1
        if vs.tm == 10:
            wx.MessageBox('The Demo program only can run 10 times.','Up to limit!', style = wx.CENTER | wx.ICON_WARNING | wx.OK)
            self.Close()
            self.Destroy()    
        event.Skip()
예제 #26
0
def write_test_result_report_word(result_report):
    image_path = sys_tools.base_path + '\\auto_results\\screenshots\\' + result_report.screen_shot
    word_result_path = sys_tools.base_path + '\\auto_results\\test_results\\' + settings.test_result_file_name_word
    # image_path = '../auto_results/screenshots/' + result_report.screen_shot
    # word_result_path = '../auto_results/test_results/' + settings.test_result_file_name_word

    if os.path.exists(word_result_path):
        document = Document(word_result_path)
    else:
        document = Document()
    document.add_heading(result_report.report_name + ': ' + result_report.report_test_result.__str__() + '-' + result_report.browser.__str__(), level=1)
    document.add_paragraph(text='Test Result: ' + result_report.msg)
    document.add_picture(image_path, width=Inches(4.5))
    document.save(word_result_path)
예제 #27
0
    def exportPayInfo(self):
        """ 导出出付费信息 """

        title = '%s[%s]' % (self.username, datetime.strftime(datetime.now(), "%Y-%m-%d"))
        reserveInfo = pitcher.getReserveInfo()
        c1 = reserveInfo[u'状态'] == u'未确认'
        c2 = reserveInfo[u'最后确认时间'].apply(parser.parse) > datetime.now()
        reserveInfo = reserveInfo[c1 & c2]
        document = Document()
        document.add_heading(title)
        for i, row in reserveInfo.iterrows():
            document.add_heading(u'票项%d' % (i + 1), level=1)
            document.add_paragraph(text=u'航线: ' + row[u'航线'])
            document.add_paragraph(text=u'航班时间: ' + row[u'航班时间'])
            document.add_paragraph(text=u'人数: ' + row[u'人数'])
            document.add_paragraph(text=u'金额: ' + row[u'金额'])
            document.add_paragraph(text=u'最后确认时间: ' + row[u'最后确认时间'])
            filename = tempfile.mktemp(suffix='.jpg',prefix='tmp_')
            with open(filename, 'wb') as f:
                orderNumber = pitcher.getOrderNumber(row[u'预订ID'])
                qrcode = pitcher.getWeixinPayQrcode(orderNumber)
                f.write(qrcode)
            document.add_picture(filename, width=Inches(1))
            time.sleep(self.normalWaitingSecond)
        filename = tempfile.mktemp(suffix='.docx',prefix=title + '_')
        document.save(filename)

        # 发送邮件
        send_mail(settings.MAIL_LIST, title, u'见附件', [filename])
예제 #28
0
    def _write_docx(self, path, content):

        """
        Write a .docx file.

        Args:
            path (str): The file path.
            content (str): The file content.
        """

        docx = Document()
        docx.add_paragraph(content)
        docx.core_properties.created = datetime.now()
        docx.save(path)
def transclusionDOCX(templatefilepath, resultDocumentDirectoryPath, dataEntities):
    if not os.path.isfile(templatefilepath):
        return False
    TransclusionId = uuid.uuid1();
    TransclusionDocumentNumber = 1;
    for dataEntity in dataEntities:
        resultDocumentPath = "{0}\\{1}_{2}.docx".format(resultDocumentDirectoryPath, TransclusionId, TransclusionDocumentNumber)   
        document = Document()
        document.add_heading(dataEntity["id"], 0)
        with open(templatefilepath,"r") as template_file:
            for line in template_file:
                document.add_paragraph(line.format(**dataEntity))
        document.save(resultDocumentPath)
        TransclusionDocumentNumber+=1
    return True
예제 #30
0
def create_word_document():
    """Creates a word document containing job data"""

    document = Document()

    style=document.styles['Normal']
    style.font.name = 'Calibri'
    style.font.size = Pt(12)


    def add_line(label, value):
        """Adds the paragraph with format of 'label: value[bolded]'"""
        para = document.add_paragraph()
        para_label = para.add_run(label + ": ")
        para_value = para.add_run(value)
        para_value.bold = True

    jobs = db.get_csv_json_text_data()

    for job in jobs:
        title = document.add_paragraph(job[1], style='Title')
        title.alignment = 1

        employer_name = document.add_heading(job[2], level=1)
        employer_name.alignment = 1

        add_line('Location', job[4])
        add_line('Level', job[6])
        add_line('Number of Openings', job[5])
        add_line('Discipline', job[7])
        add_line('Job Id', job[0])


        if (len(job[10]) > 10): #show Comments header only if the job contains comments
            document.add_heading('Comments', level=2)
            document.add_paragraph(job[10])

        # document.add_paragraph('\n\n\n\n\n\n\n')

        document.add_heading('Summary', level=2)
        #every time a <br /> occurs, add a linebreak to the current document
        print('The type of job[11] is: ' + str(type(job[11])))
        summary = job[11].replace('<br />', '\n').replace('&#039;', "'").replace('&nbsp;', ' ')
        document.add_paragraph(summary)

        document.add_page_break()

    document.save('jobs_EZSearch.docx')
예제 #31
0
soup = BeautifulSoup(source, 'lxml')
mainSwiper = soup.find('div',class_='swiper-wrapper clearfix')
swiper = mainSwiper.find_all('div', class_='swiper-slide')

i=0
List = []
Thumbnails = []
for news in swiper:
    List.append(news.a.get('href'))
    Thumbnails.append(news.img.get('src'))
    i = i + 1



for links in List:
    counter = 0
    inner_source = requests.get(links).text
    inner_soup = BeautifulSoup(inner_source,'lxml')
    article = inner_soup.find('div', class_='content-element')
    if article != None:
        paragraphs = article.find_all('p')
        print(paragraphs)
        for p in paragraphs:
            if(counter!=0):
                document.add_paragraph(p.text)
            else:
                document.add_paragraph(p, style='IntenseQuote')

            counter = counter+1

document.save(f'{name}.docx')
    txt = txt.replace('.' + clean_up[i], '@' + clean_up[i])
    txt = txt.replace(clean_up[i] + ']', clean_up[i] + 'l')
    txt = txt.replace('a]', 'al')
    txt = txt.replace('a[', 'al')
    txt = txt.replace(clean_up[i] + '[', clean_up[i] + 'l')

txt = txt.replace(' ] ', ' I ')
txt = txt.replace(' [ ', ' I ')

numParagraphs = txt.count('\n\n') + 1

txt_list = txt.split('\n\n')
#final_text.append(txt

ocr_raw = Document()
ocr_raw.add_paragraph('')
ocr_raw.paragraphs[0].add_run(
    txt)  # Each txt is generated per a page from the PDF
ocr_raw.save('OCR-raw.docx')
user_input = input(
    "Type 'p' for PARAGRAPHS or 'c' for CHAPTERS (include quotes): ")
document = Document('OCR-raw.docx')
user_input = str(user_input)
if user_input == 'p':
    section_string = "PARAGRAPH "
else:
    section_string = "CHAPTER "

new_document = Document('PB-Template.docx')
#numParagraphs = len(document.paragraphs)
numTemplateParagraphs = len(new_document.paragraphs)
예제 #33
0
def export_doc_single(request, *args, **kwargs):
    """Function to export a single QAPP object as a Word Docx file."""
    qapp_id = kwargs.get('pk', None)
    qapp_info = get_qapp_info(request.user, qapp_id)

    if not qapp_info:
        return HttpResponseRedirect(request)

    filename = '%s.docx' % slugify(qapp_info['qapp'].title)

    document = Document()
    add_custom_headers(document)
    styles = document.styles

    # #################################################
    # BEGIN COVER PAGE
    # #################################################
    # Coversheet with signatures section:
    # TODO: Add top row logo and blue background label
    run = document.add_paragraph().add_run()

    try:
        if DEBUG:
            logo = path.join(STATIC_ROOT, 'EPA_Files', 'loogo.png')
            qual_assur_proj_plan = path.join(
                STATIC_ROOT, 'images', 'quality_assurance_project_plan.PNG')
        else:
            logo = static('logo.png')
            qual_assur_proj_plan = static('quality_assurance_project_plan.PNG')

        run.add_picture(logo, width=Inches(1.5))
        run.add_text('\t\t\t')
        run.add_picture(qual_assur_proj_plan, width=Inches(3))

    except FileNotFoundError:
        print('couldn\'t find the static images!')

    # TODO Make blue_header text white, add blue background with shadow
    # document.add_picture('blue_background.png', width=Inches(4))
    # background color: rgb(0, 176, 240)

    # The rest of the document will be WD_ALIGN_PARAGRAPH.CENTER

    # blank line
    add_center_heading(document, 'Office of Research and Development', 1)
    add_center_heading(document, qapp_info['qapp'].division.name, 1)
    # blank line
    # Next few sections are from the qapp object
    add_center_heading(document, qapp_info['qapp'].division_branch, level=3)
    # blank line
    add_center_heading(document, 'EPA Project Lead', level=2)
    for lead in qapp_info['qapp_leads']:
        add_center_heading(document, lead.name, level=3)
    # blank line
    add_center_heading(document, qapp_info['qapp'].intra_extra, level=3)
    add_center_heading(document, qapp_info['qapp'].qa_category, level=3)
    add_center_heading(document, qapp_info['qapp'].revision_number, level=3)
    add_center_heading(document, str(qapp_info['qapp'].date), level=3)
    # blank line
    add_center_heading(document, 'Prepared By', level=2)
    add_center_heading(document,
        '%s %s' % (qapp_info['qapp'].prepared_by.first_name,
                    qapp_info['qapp'].prepared_by.last_name,),
        level=3)
    # blank line
    add_center_heading(document, qapp_info['qapp'].strap, level=3)
    add_center_heading(document, qapp_info['qapp'].tracking_id, level=3)

    # #################################################
    # END COVER PAGE
    # BEGIN APPROVAL PAGE
    # #################################################

    add_custom_heading(document, 'A.1 Approval Page', level=2)
    # Signature grid ...
    num_signatures = len(qapp_info['signatures'])
    table = document.add_table(rows=6+num_signatures, cols=12)
    table.style = styles['Table Grid']

    set_table_row_height(table)

    row_cells = table.rows[0].cells
    row_cells[0].text = 'QA Project Plan Title:'
    row_cells[0].merge(row_cells[3])
    row_cells[4].text = qapp_info['qapp_approval'].project_plan_title
    row_cells[4].merge(row_cells[11])

    row_cells = table.rows[1].cells
    row_cells[0].text = 'QA Activity Number:'
    row_cells[0].merge(row_cells[3])
    row_cells[4].text = qapp_info['qapp_approval'].activity_number
    row_cells[4].merge(row_cells[11])

    # TODO: Center text in this row:
    row_cells = table.rows[2].cells
    row_cells[0].text = 'If Intramural or Extramural, EPA Project Approvals'
    row_cells[0].merge(row_cells[11])

    iter_count = 0
    # TODO: Iterate through EPA Project Approvals:
    # Start with row 3 + iter_count++
    for sig in qapp_info['signatures']:
        if not sig.contractor:
            row_cells = table.rows[3 + iter_count].cells
            row_cells[0].text = 'Name:'
            row_cells[1].text = sig.name
            row_cells[1].merge(row_cells[3])

            row_cells[4].text = 'Signature/Date:'
            row_cells[4].merge(row_cells[5])
            row_cells[6].merge(row_cells[11])
            iter_count += 1

    # Always insert a blank entry for hand-written approval sigs
    row_cells = table.rows[3 + iter_count].cells
    row_cells[0].text = 'Name:'
    row_cells[1].merge(row_cells[3])
    row_cells[4].text = 'Signature/Date:'
    row_cells[4].merge(row_cells[5])
    row_cells[6].merge(row_cells[11])

    # TODO: Center text in this row:
    row_cells = table.rows[4 + iter_count].cells
    row_cells[0].text = 'If Extramural, Contractor Project Approvals'
    row_cells[0].merge(row_cells[11])

    # TODO: Iterate through Contractor Project Approvals:
    # Start with row 5 + iter_count++
    for sig in qapp_info['signatures']:
        if sig.contractor:
            row_cells = table.rows[5 + iter_count].cells
            row_cells[0].text = 'Name:'
            row_cells[1].text = sig.name
            row_cells[1].merge(row_cells[3])

            row_cells[4].text = 'Signature/Date:'
            row_cells[4].merge(row_cells[5])
            row_cells[6].merge(row_cells[11])
            iter_count += 1

    # Always insert a blank entry for hand-written approval sigs
    row_cells = table.rows[5 + iter_count].cells
    row_cells[0].text = 'Name:'
    row_cells[1].merge(row_cells[3])
    row_cells[4].text = 'Signature/Date:'
    row_cells[4].merge(row_cells[5])
    row_cells[6].merge(row_cells[11])
    document.add_page_break()

    # #################################################
    # END APPROVAL PAGE
    # BEGIN ToC PAGE
    # #################################################

    create_toc(document)
    document.add_page_break()

    # #################################################
    # END ToC PAGE
    # BEGIN Everything Else PAGE
    # #################################################

    #  1) Heading 1 - Revision History
    document.add_heading('Revision History', level=1)
    #  2) Table Label
    document.add_heading('Table 1 QAPP Revision History', level=3)
    #  3) Table (revision history)
    num_revisions = len(qapp_info['revisions'])
    table = document.add_table(rows=1+num_signatures, cols=3)
    table.style = styles['Light List']
    row_cells = table.rows[0].cells
    row_cells[0].text = 'Revision Number'
    row_cells[1].text = 'Date Approved'
    row_cells[2].text = 'Revision'

    iter_count = 0
    for rev in qapp_info['revisions']:
        row_cells = table.rows[1+iter_count].cells
        row_cells[0].text = rev.revision
        row_cells[1].text = str(rev.effective_date)
        row_cells[2].text = rev.description
        iter_count += 1

    # TODO: Paragraphs aren't formatting properly, still double spaces...

    # Section A
    document.add_heading('Section A - Executive Summary', level=1)
    if qapp_info['section_a']:
        document.add_heading('A.3 Distribution List', level=2)
        document.add_paragraph(
            qapp_info['section_a'].a3,
            styles['No Spacing'])
        document.add_heading('A.4 Project Task Organization', level=2)
        document.add_paragraph(
            qapp_info['section_a'].a4,
            styles['No Spacing'])
        document.add_heading('A.5 Problem Definition Background', level=2)
        document.add_paragraph(
            qapp_info['section_a'].a5,
            styles['No Spacing'])
        document.add_heading('A.6 Project Description', level=2)
        document.add_paragraph(
            qapp_info['section_a'].a6,
            styles['No Spacing'])
        document.add_heading('A.7 Quality Objectives and Criteria', level=2)
        document.add_paragraph(
            qapp_info['section_a'].a7,
            styles['No Spacing'])
        document.add_heading('A.8 Special Training Certification', level=2)
        document.add_paragraph(
            qapp_info['section_a'].a8,
            styles['No Spacing'])
        document.add_heading('A.9 Documents and Records', level=2)
        document.add_paragraph(
            qapp_info['section_a'].a9,
            styles['No Spacing'])
    else:
        document.add_heading('SECTION A INCOMPLETE!', level=2)

    # Section B
    document.add_heading('Section B', level=1)
    if qapp_info['section_b']:
        sectionb_type = qapp_info['section_a'].sectionb_type.name
        section_b_info = SECTION_B_INFO[sectionb_type]
        for key in section_b_info:
            val = getattr(qapp_info['section_b'], key, '')
            if section_b_info[key].get('heading', False):
                document.add_heading(section_b_info[key]['heading'], level=2)
            document.add_heading(section_b_info[key]['label'], level=3)
            document.add_paragraph(val, styles['No Spacing'])
    else:
        document.add_heading('SECTION B INCOMPLETE!', level=2)

    # Section C
    document.add_heading('Section C', level=1)
    if qapp_info['section_c']:
        document.add_heading(
            'C.1 Assessments and Response Actions', level=2)
        document.add_paragraph(
            qapp_info['section_c'].c1,
            styles['No Spacing'])
        document.add_heading('C.2 Reports to Management', level=2)
        document.add_paragraph(
            qapp_info['section_c'].c2,
            styles['No Spacing'])
        # document.add_heading('C.3 Quality Metrics (QA/QC Checks)', level=2)
        # document.add_paragraph(
        #     qapp_info['section_c'].c3,
        #     styles['No Spacing'])
    else:
        document.add_heading(
            'C.1 Assessments and Response Actions', level=2)
        document.add_heading('C.2 Reports to Management', level=2)

    # Section D
    if qapp_info['section_d']:
        document.add_heading('Section D', level=1)
        document.add_heading(
            'D.1 Data Review, Verification, and Validation', level=2)
        document.add_paragraph(
            qapp_info['section_d'].d1,
            styles['No Spacing'])
        document.add_heading(
            'D.2 Verification and Validation Methods', level=2)
        document.add_paragraph(
            qapp_info['section_d'].d2,
            styles['No Spacing'])
        document.add_heading(
            'D.3 Reconciliation with User Requirements', level=2)
        document.add_paragraph(
            qapp_info['section_d'].d3,
            styles['No Spacing'])
    else:
        document.add_heading('SECTION D INCOMPLETE!', level=2)

    # References
    document.add_heading('References', level=1)
    if qapp_info['references']:
        run = document.add_paragraph().add_run()
        run.add_text(qapp_info['references'].references)
        # document.add_paragraph(
        #     qapp_info['references'].references.replace('\r\n\r\n', '\r\n'),
        #     styles['No Spacing'])
    else:
        document.add_heading('REFERENCES SECTION INCOMPLETE!', level=2)

    content_type = 'application/vnd.openxmlformats-officedocument.' + \
            'wordprocessingml.document'
    response = HttpResponse(content_type)
    response['Content-Disposition'] = 'attachment; filename=%s' % filename
    document.save(response)
    response['filename'] = filename
    return response
예제 #34
0
파일: lq3.py 프로젝트: GanYuLin/LQ-Spider
import requests
from bs4 import BeautifulSoup
import os
import docx
from docx import Document
from docx.shared import Inches

url = 'https://www.qiushibaike.com/article/119757360'
html = requests.get(url).content
soup = BeautifulSoup(html,'html.parser')
wen = soup.find('div',{"class":"content"}).text
img = str(soup.find('div',{"class":"thumb"})).split('src="')[1].split('"/')[0]
tu = 'https:' + img
img_name = img.split('/')[-1]
print(img)
#保存图片至本地
with open(img_name,'wb')as f:
    response = requests.get(tu).content
    f.write(response)
    f.close()

document = Document()
document.add_paragraph(wen)#向文档里添加文字
document.add_picture(img_name)#向文档里添加图片
document.save('tuwen.doc')#保存文档
os.remove(img_name)#删除保存在本地的图片
예제 #35
0
sec.top_margin = distance

sec.bottom_margin = distance

sec.page_width = Inches(12)  # 设置页面宽度

sec.page_height = Inches(20)  # 设置页面高度

# 设置默认字体

chg_font(doc.styles['Normal'], fontname='宋体')

# 步骤三.在文档对象中加入段落文本、表格、图像等,并指定其样式
# 1.添加段落文本
paragraph = doc.add_paragraph('text....')

ph_format = paragraph.paragraph_format

ph_format.space_before = Pt(10)  # 设置段前间距

ph_format.space_after = Pt(12)  # 设置段后间距

ph_format.line_spacing = Pt(19)  # 设置行间距
# 2.添加表格,并填写相关内容

tab = doc.add_table(rows=4, cols=4)  # 添加一个4行4列的空表

cell = tab.cell(1, 3)  # 获取某单元格对象(从0开始索引)
cell.text = 'abc'
예제 #36
0
def article_summarizer_BERT(path, name, ratio):

    articles_list = articles(path)
    articles_lists = []
    for art in articles_list:
        articles_lists.append(sent_tokenize(art))

    model = Summarizer()

    summaries = []

    for x in articles_list:
        result = model(x, ratio=ratio, min_length=60)
        summaries.append(result)

    titles_list = []
    df = xls_to_csv(path)
    for i in range(len(df)):
        titles_list.append(df['Title'][i])

    document = Document()

    document.add_heading(name, 0)

    for i in range(len(summaries)):

        document.add_heading('Résumé : ' + titles_list[i], level=1)
        resume = document.add_paragraph(summaries[i])
        resume.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY

        error1 = resume.text.find(').')
        while error1 > -1 and error1 + 2 <= len(
                resume.text) - 1 and resume.text[error1 + 2] != ' ':
            pre = resume.text[:error1 + 2]
            post = resume.text[error1 + 2:]
            error1 = post.find(').')
            resume.text = pre
            resume.add_run(' ')
            resume.add_run(post)
        error2 = resume.text.find(' .')
        while error2 > -1:
            pre = resume.text[:error2]
            post = resume.text[error2 + 2:]
            resume.text = pre
            resume.add_run('. ')
            resume.add_run(post)
            error2 = resume.text.find(' .')

        document.add_heading(titles_list[i], level=1)
        para = document.add_paragraph()
        for j in range(len(articles_lists[i])):
            resume = sent_tokenize(document.paragraphs[2].text)
            if articles_lists[i][j] in summaries[i]:
                run = para.add_run(articles_lists[i][j])
                run.font.highlight_color = WD_COLOR_INDEX.YELLOW
                para.add_run(' ')
            else:
                para.add_run(articles_lists[i][j])
                para.add_run(' ')
            para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY

    file_name = name + '.docx'
    document.save(file_name)
예제 #37
0
        count_pages_int = 0
        if type_table[0] in publication_type and type_table[0] == 'статья':
            count_pages_int = (int(count_pages[3]) * 10 + int(count_pages[4]) - int(count_pages[0]) * 10 - int(
                count_pages[1])) / 16
            type_table[2] += count_pages_int
            type_table[1] += 1
            journals_publish_name.append(info["WorkName"])
            author_id_number += 1
            for author in info["Authors"]:
                journals_authors_names.append([author["Name"], author_id_number])
            journals_output_data.append(info["PublicationMeta"]["ЖУРНАЛ"])
            journals_count_pages.append(count_pages_int)


document = Document()
paragraph_1 = document.add_paragraph()
paragraph_1.add_run('Приложение 2')
paragraph_1.alignment = 2
paragraph_2 = document.add_paragraph()
paragraph_2.add_run('к Распоряжению № _____ от ______ г.')
paragraph_2.alignment = 2
document.add_heading('РЕЗУЛЬТАТИВНОСТЬ НАУЧНО-ИССЛЕДОВАТЕЛЬСКОЙ ДЕЯТЕЛЬНОСТИ КАФЕДРЫ «Информационная безопасность» В' +
                     ' ' + str(max_year) + ' ' + 'ГОДУ', 1)
all_publish = document.add_table(rows=len(sort_tables[:6]) + 1, cols=2)
all_publish.style = 'Table Grid'
heading_all_publish = all_publish.rows[0].cells
heading_all_publish[0].paragraphs[0].add_run('Показатель').bold = True
heading_all_publish[1].paragraphs[0].add_run('Количество').bold = True
iterator = 1
for info in sort_tables[:6]:
    all_publish_row_data = all_publish.rows[iterator].cells
class Print_document():
    def start_doc(self):
        self.document = Document()

    def reinitialize_doc(self):
        self.document = Document('Temp.docx')

    def initialize_doc(self):
        sections = self.document.sections
        for section in sections:
            section.top_margin = Cm(2.54)
            section.bottom_margin = Cm(2.54)
            section.left_margin = Cm(2.54)
            section.right_margin = Cm(2.54)

        style = self.document.styles['Normal']
        font = style.font
        font.name = 'Times New Roman'
        font.size = Pt(14)

        style = self.document.styles['Heading 2']
        font1 = style.font
        font1.name = 'TimesNewRoman'
        font1.size = Pt(16)

        header = self.document.sections[0].header
        ht0 = header.add_paragraph()
        kh = ht0.add_run()
        kh.add_picture('Pristine.png', width=Inches(2))
        kh.alignment = WD_ALIGN_PARAGRAPH.LEFT

        footer = self.document.sections[0].footer
        f = footer.add_paragraph(
            'All Rights Reserved by Pristine InfoSolutions Pvt. Ltd.')
        f.alignment = WD_ALIGN_PARAGRAPH.CENTER
        f.style = self.document.styles['Normal']
        f.bold = True
        f.size = Pt(16)

    def setVname(self, Vname):

        self.document.add_heading('Vulnerability Name:', 2)
        p = self.document.add_paragraph(Vname)
        p.style = self.document.styles['Normal']

    def setVSeverity(self, severity):
        p = self.document.add_heading('Severity', 2)
        p.style = self.document.styles['Heading 2']
        p.bold = True
        p.size = Pt(16)
        p.name = 'TimesNewRoman'
        p = self.document.add_paragraph(severity)
        p.style = self.document.styles['Normal']

    def SetVdesc(self, VDesc):
        vuldesh = self.document.add_heading('Vulnerability Description:', 2)
        p = self.document.add_paragraph(VDesc)

    def setVurl(self, Vurl):
        self.document.add_heading('Vulnerable URL: ', 2)
        p = self.document.add_paragraph(Vurl)
        p.style = self.document.styles['Normal']

    def setImg(self, Img):
        self.document.add_heading('Proof of Concept: ', 2)
        if (Img):
            lengthImg = len(Img[0])
            for i in range(0, lengthImg):
                self.document.add_picture(Img[0][i], width=Cm(15.95))

    def setImpact(self, VImpact):
        self.document.add_heading('Impact: ', 2)
        p = self.document.add_paragraph(VImpact)
        p.style = self.document.styles['Normal']

    def setVremed(self, Vrem):
        self.document.add_heading('Remediation', 2)
        p = self.document.add_paragraph(Vrem)
        p.style = self.document.styles['Normal']

    def setConclusion(self, Conclusion):
        self.document.add_heading('Conclusion', 2)
        p = self.document.add_paragraph(Conclusion)
        p.style = self.document.styles['Normal']

    def pageBreak(self):
        self.document.add_page_break()

    def Savedoc(self, name):
        self.document.save(name[0] + '.docx')

    def Savereport(self):
        self.document.save('Temp.docx')
예제 #39
0
    def train_GradientBoosting(self, X, y, title):
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.3,
                                                            random_state=0)

        # Set the parameters by cross-validation
        tuned_parameters = [{
            'learning_rate': [0.01, 0.05, 0.1],
            'n_estimators': [100, 200, 300],
            'max_features': ['auto', 'sqrt', 0.2, 0.4, 0.6]
        }]

        scores = ['neg_mean_squared_error']

        for score in scores:
            document = Document()
            document.add_heading('{}_GradientBoosting_{}'.format(title, score),
                                 0)

            document.add_paragraph("# Tuning hyper-parameters for %s" % score)

            clf = GridSearchCV(GradientBoostingRegressor(),
                               tuned_parameters,
                               cv=5,
                               scoring=score)
            clf.fit(X_train, y_train)

            document.add_paragraph(
                "Best parameters set found on development set:")
            document.add_paragraph(clf.best_params_)

            document.add_paragraph("Grid scores on development set:")
            means = clf.cv_results_['mean_test_score']
            stds = clf.cv_results_['std_test_score']
            for mean, std, params in zip(means, stds,
                                         clf.cv_results_['params']):
                document.add_paragraph("%0.3f (+/-%0.03f) for %r" %
                                       (mean, std * 2, params))

            document.add_paragraph()
            document.add_paragraph(
                "The scores are computed on the full evaluation set")

            y_true, y_pred = np.squeeze(y_test).tolist(), clf.predict(
                X_test).tolist()
            print(clf.best_estimator_.feature_importances_)
            self.saver.save_pickle(
                data=clf, filename='GradientBoosting_{}'.format(title))

            pre_test = pd.DataFrame({'y_true': y_true, 'y_pre': y_pred})
            self.saver.save_excel(pre_test,
                                  filename='predictions_gradientboosting',
                                  foldername='Model')

            document.add_paragraph(str(mean_squared_error(y_true, y_pred)))
            fold_path = os.path.join(self.datasavedir, 'doc', 'Model')
            save_path = self.check_savepath(
                foldpath=fold_path,
                filename='{}_GradientBoosting_{}.docx'.format(title, score))
            document.save(save_path)
예제 #40
0
from bs4 import BeautifulSoup
import requests
from docx import Document
import os

if not os.path.isdir('./scpArchive'):
    os.mkdir('./scpArchive')

for i in range(2, 10):
    scp = requests.get("http://www.scp-wiki.net/scp-00" + str(i))
    soup = BeautifulSoup(scp.text, 'html.parser')
    main_text = soup.select('p')
    file = Document()
    for j in range(8, len(main_text) - 1):
        try:
            file.add_paragraph(main_text[j].getText())
        except ValueError:
            file.add_paragraph('Line ' + str(j) + " had an error")
    file.save('./scpArchive/scp-00' + str(i) + ".docx")

for i in range(10, 100):
    scp = requests.get("http://www.scp-wiki.net/scp-0" + str(i))
    soup = BeautifulSoup(scp.text, 'html.parser')
    main_text = soup.select('p')
    file = Document()
    for j in range(8, len(main_text) - 1):
        try:
            file.add_paragraph(main_text[j].getText())
        except ValueError:
            file.add_paragraph('Line ' + str(j) + " had an error")
    file.save('./scpArchive/scp-0' + str(i) + ".docx")
예제 #41
0
class Docx:
    NEWLINE_XML = '</w:t><w:br/><w:t xml:space="preserve">'
    NEWPARAGRAPH_XML = '</w:t></w:r></w:p><w:p><w:r><w:t xml:space="preserve">'
    TAB_XML = '</w:t></w:r><w:r><w:tab/></w:r><w:r><w:t xml:space="preserve">'
    PAGE_BREAK = '</w:t><w:br w:type="page"/><w:t xml:space="preserve">'
    def __init__(self,doc_name):
        if doc_name:
            self.doc = Document(doc_name)
        else:
            self.doc = Document()
        self.doc_name = doc_name

    def add_heading(self,title_level,title):
        '''
        添加标题
        title_level 表示添加"标题xx",itle_level为1,2,3...
        title 标题名称
        '''
        self.doc.add_heading(title, level=title_level)

    def add_paragraph(self,content):
        return self.doc.add_paragraph(content)

    def set_paragraph_property(self,paragraph,blod,font_size,font_color,bk_color):
        pass

    def add_table(self,row=1,col=1,style=None):
        table = self.doc.add_table(rows=row,cols=col, style='Table Grid')
        table.autofit = False
        return table

    def get_cell(self,table,row,col):
        '''
        获取表格指定行和列的单元格
        table为_Table对象,可通过add_table函数获取
        row 为cell所在的行,索引从0,1,2...表示第1,2,3...行
        colo为cell所在的列,索引从0,1,2...表示第1,2,3...列
        '''
        return table.rows[row].cells[col]
        # return table.cell(row,col)

    def merge_cell(self,cells):
        merged_cell = cells[0]
        if len(cells) < 2:
            return merged_cell
        else:
            text = ''
            for i in range(1,len(cells)):
                merged_cell = merged_cell.merge(cells[i])
                if merged_cell.paragraphs and len(merged_cell.paragraphs) > 0:
                    text = merged_cell.text.replace('\n','')
            merged_cell.text = text
        return merged_cell

    
    def set_cell_background(self,cell,bk_color=None):
        '''
        设置单元格背景颜色
        cell:       _Cell对象,可通过get_cell函数获取
        bk_color:   RGB字符串,例如"AABBCC"
        '''
        xml = r'<w:shd {0} w:fill="{1}"/>'.format(nsdecls('w'),bk_color)
        # print(xml)
        shading_elm_1 = parse_xml(xml)
        cell._tc.get_or_add_tcPr().append(shading_elm_1)

    def set_cell_text(self,cell,text):
        '''
        设置单元格文本
        cell    _Cell对象,可通过get_cell函数获取
        text    需要添加的文本内容,若cell中已经有文本,则被替换掉
        '''
        paragraph = None
        if cell.paragraphs and len(cell.paragraphs) > 0:
            paragraph = cell.paragraphs[-1] #取最后一个索引的段落
            paragraph.text = text
        else:
            paragraph = cell.add_paragraph(text)
        paragraph.first_line_indent = Inches(-10)
        paragraph.paragraph_format.first_line_indent = Inches(0)
        

        # p.aligment = WD_ALIGN_PARAGRAPH.LEFT
        # p.left_indent = Inches(0)
        # paragraph_format = p.paragraph_format
        # paragraph_format.left_indent = Inches(0)
        # cell.text = text

    def set_cell(self,cell,text,
                        style=None,
                        color=None,
                        highlight=None,
                        size=None,
                        subscript=None,
                        superscript=None,
                        bold=False,
                        italic=False,
                        underline=False,
                        strike=False,
                        font=None,
                        url_id=None):
        # If not a string : cast to string (ex: int, dict etc...)
        if not isinstance(text, (six.text_type, six.binary_type)):
            text = six.text_type(text)
        if not isinstance(text, six.text_type):
            text = text.decode('utf-8',errors='ignore')
        text = (escape(text)
                .replace('\n', self.NEWLINE_XML)
                .replace('\a', self.NEWPARAGRAPH_XML)
                .replace('\t', self.TAB_XML)
                .replace('\f', self.PAGE_BREAK))

        prop = u''

        if style:
            prop += u'<w:rStyle w:val="%s"/>' % style
        if color:
            if color[0] == '#':
                color = color[1:]
            prop += u'<w:color w:val="%s"/>' % color
        if highlight:
            if highlight[0] == '#':
                highlight = highlight[1:]
            prop += u'<w:highlight w:val="%s"/>' % highlight
        if size:
            prop += u'<w:sz w:val="%s"/>' % size
            prop += u'<w:szCs w:val="%s"/>' % size
        if subscript:
            prop += u'<w:vertAlign w:val="subscript"/>'
        if superscript:
            prop += u'<w:vertAlign w:val="superscript"/>'
        if bold:
            prop += u'<w:b/>'
        if italic:
            prop += u'<w:i/>'
        if underline:
            if underline not in ['single','double']:
                underline = 'single'
            prop += u'<w:u w:val="%s"/>' % underline
        if strike:
            prop += u'<w:strike/>'
        if font:
            prop += ( u'<w:rFonts w:ascii="{font}" w:hAnsi="{font}" w:cs="{font}"/>'
                      .format(font=font) )


        xml = u'<w:r>'
        if prop:
            xml += u'<w:rPr>%s</w:rPr>' % prop
        xml += u'<w:t xml:space="preserve">%s</w:t></w:r>' % text
        if url_id:
            xml = ( u'<w:hyperlink r:id="%s" w:tgtFrame="_blank">%s</w:hyperlink>'
                    % (url_id, xml) )
        print(xml)
        tcPr = parse_xml(xml)
        cell._tc.get_or_add_tcPr().append(tcPr)


    def close(self):
        self.doc.save(self.doc_name)
    
    def save_as(self,doc_name):
        self.doc.save(doc_name)
예제 #42
0
def exam_export(exam):
    document = Document()

    document.add_heading((exam.exam_name + "(Exam)"), 0)
    document.add_heading('Overall information', level=1)
    document.add_paragraph('ID: ' + str(exam.id))
    document.add_paragraph('Name: ' + exam.exam_name)
    document.add_paragraph('Pass time: ' + exam.pass_time)
    document.add_paragraph('Status: ' + exam.status)
    document.add_paragraph('Score: ' + str(exam.score))
    document.add_paragraph('Enrollee: ' + str(exam.get_enrollee()))
    document.add_paragraph('Examiner: ' + str(exam.get_examiner()))

    document.save('..\\..\\Reports\\Exams\\'
                  + exam.exam_name + '.(' + str(exam.id) + ').docx')
예제 #43
0
def image_extract():
    file_dir = os.getcwd()
    directory = 'resumes/'
    # exploring the directory for all jpg files
    for file in os.listdir(directory):
        if file.endswith(".jpg") | file.endswith(".jpeg") | file.endswith(
                ".png"):
            full_path = os.path.join(directory, file)
            #file_path = file_dir + "/resumes/" + str(file)

            # reading file with cv2
            img = cv2.imread(full_path)
            ratio = img.shape[0] / 500.0
            original_img = img.copy()

            # converting image into grayscale
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            # blurring and finding edges of the image
            blurred = cv2.GaussianBlur(gray, (5, 5), 0)
            edged = cv2.Canny(gray, 75, 200)

            # applying threshold to grayscale image
            thresh = cv2.threshold(gray, 225, 255, cv2.THRESH_BINARY_INV)[1]

            # finding contours
            (cnts, _) = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
                                         cv2.CHAIN_APPROX_SIMPLE)

            # draw contours on image
            cv2.drawContours(img, cnts, -1, (240, 0, 159), 3)

            H, W = img.shape[:2]
            for cnt in cnts:
                x, y, w, h = cv2.boundingRect(cnt)
                if cv2.contourArea(cnt) > 100 and (0.7 < w / h < 1.3) and (
                        W / 4 < x + w // 2 < W * 3 / 4) and (H / 4 < y + h // 2
                                                             < H * 3 / 4):
                    break

        # creating mask and performing bitwise-op
            mask = np.zeros(img.shape[:2], np.uint8)
            cv2.drawContours(mask, [cnt], -1, 255, -1)
            dst = cv2.bitwise_and(img, img, mask=mask)

            # displaying image and saving in the directory
            gray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
            gray = cv2.medianBlur(gray, 3)
            gray = cv2.threshold(gray, 0, 255,
                                 cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
            scanned_file_name = "resumes/" + str(file[:-4]) + "-Scanned.png"

            cv2.imwrite(scanned_file_name, dst)

            # fetching text from the image and storing it into a text file
            file_text = pytesseract.image_to_string(
                Image.open(scanned_file_name))

            text_file_name = "resumes/" + str(file) + ".txt"

            ### if we need the jpg extension
            word_file_name = "resumes/" + str(file) + ".docx"

            with open(text_file_name, "a") as f:

                f.write(file_text + "\n")

    for i in os.listdir(directory):
        document = Document()

        myfile = open(text_file_name).read()
        myfile = re.sub(r'[^\x00-\x7F]+|\x0c', ' ',
                        myfile)  # remove all non-XML-compatible characters
        p = document.add_paragraph(myfile)
        document.save(word_file_name)
    os.remove(scanned_file_name)
    os.remove(text_file_name)

    return word_file_name
예제 #44
0
class Visualizer:
    """
    Reads in the log files and extracts the necessary information needed
    to a docx (pdf later) file

    Arguments:
        - path_logs: the path to the logs
        - mturk_session: the mturk dev session

    Returns:
        - A word file which shows the necessary information from the logs
    """
    def __init__(self, path_logs, mturk_session):
        self.path_results = os.path.join(path_logs + "/output/", mturk_session)
        self.path_logs = os.path.join(path_logs, mturk_session)

    def run(self, global_rdr):
        # This if condition is for using the imported reader class
        # Differentiates between when called from here or from the run.py file (change style later)
        if not global_rdr:
            logs_reader = rdr.ReadLogs(self.path_logs)
        else:
            logs_reader = global_rdr.ReadLogs(self.path_logs)
            # logs_reader = grdr.ReadLogs(self.path_logs, self.mturk_session)

        # This returns a list of the speakers userid in each meetup room and the corresponding list of json logtexts
        list_users, list_logtext = logs_reader.run()
        for _users, single_logtext in zip(self.list_users, self.list_logtext):
            self._write_to_docx(_users, single_logtext)
        print("DONE")

    def _write_to_docx(self,
                       _users,
                       single_logtext,
                       includeProcessed=False,
                       single_sample=None):
        """
        This function creates a document (word file) for each logfile and writes the
        necessary information into it (pdf later)
        """
        ##########################################################
        #        Create a dict for structuring the inputs        #
        ##########################################################
        dict_inputs = {
            'table_traits': ['Traits / Attitudes', 1, 1, 'Persona'],
            'table_facts': ['Facts / Knowledge', 1, 2, 'Entity', 'Content'],
            'table_questions': ['Questions', 1, 1, ''],
            'table_answers': ['Answers', 1, 2, 'Entity', 'Answer']
        }

        story = None
        for item in single_logtext:
            if item['user']['name'] == "Moderator" and item[
                    'type'] == "story_type":
                story = item['story_type']

            if item['type'] == "table_entities":
                table_entity = item['table']

                # Create document for saving the conversation and other factors
        # Create a new document for each meetup log
        self.document = Document()
        if story is None:
            self.document.add_heading("Story type not specified", 0)
        else:
            self.document.add_heading(story, 0)

        curr_userid = 0
        dialogue_list = []
        dialogue_tok_list = []

        for item in single_logtext:
            ######################################################################################
            #                       Input necessary info using dict_inputs                       #
            ######################################################################################
            # Write the facts and traits for both users
            for _id in _users:
                if _id == _users[0]:
                    speaker = 'First speaker'
                else:
                    speaker = 'Second speaker'

                for input, params in dict_inputs.items():
                    if item['type'] == input and item['user']['id'] == _id:
                        if input == 'table_traits':  # Only add this heading at the first table of each speaker
                            p = self.document.add_heading(speaker, level=1)
                            p.add_run().bold = True
                            # p.add_run('bold').bold = True
                            # p.add_run('italic.').italic = True

                        p = self.document.add_heading(params[0], level=2)
                        p.add_run().italic = True

                        if len(
                                params
                        ) > 4:  # e.g for case: table_facts and table_answers
                            table = self.document.add_table(rows=params[1],
                                                            cols=params[2])
                            # table.style = 'Light Grid Accent 1'
                            table.style = 'Table Grid'
                            hdr_cells = table.rows[0].cells
                            hdr_cells[0].text = params[3]
                            hdr_cells[1].text = params[4]

                            # Input facts correspondingly (key = topic, value = fact)
                            # Note: zip only works if the two iterations are of same length
                            for _entity, _content in zip(
                                    item['table'][0], item['table'][1]):
                                row_cells = table.add_row().cells
                                row_cells[0].text = _entity
                                row_cells[1].text = _content

                        else:  # e.g for case: table_attributes and table_questions
                            for _content in item['table']:
                                if input == 'table_traits':
                                    self.document.add_paragraph(_content,
                                                                style='List')
                                else:
                                    self.document.add_paragraph(
                                        _content, style='List Bullet')

            ######################################################
            #        Rearrange the dialogue speaker turns        #
            ######################################################
            # Rearrange the dialogue orders so that its just one line per speaker turn
            if item['type'] == 'text' and item['user']['id'] in _users:
                if item['user']['id'] == curr_userid:
                    # Update the dialogue_list if the same speaker has used two turns
                    msg = tmp_msg + " [EOU] " + item['msg']
                    dialogue_list[len(dialogue_list) - 1] = msg
                    tmp_msg = msg
                else:
                    dialogue_list.append(item['msg'])
                    # tmp_msg is used for when a speaker uses two turns (to put each of them together)
                    tmp_msg = item['msg']
                curr_userid = item['user']['id']

            if item['type'] == "join" and item['user']['name'] == "Moderator":
                uid = item['timestamp-iso'] + "-" + item['room']['name']
                # use for naming the file
                # this replace is performed because files cant be saved with character ":"
                fname = uid.replace(":", "-")
                fname = fname + ".docx"
        # --------------------------------    END OF FOR LOOP    --------------------------------#

        #####################################################
        #            Input entities provided                #
        #####################################################
        e = self.document.add_heading('Entities Provided To Users', level=1)
        e.add_run().bold = True

        for ent in table_entity:
            self.document.add_paragraph(ent, style='List Bullet')

        ################################################
        #            Input the dialogue                #
        ################################################
        p = self.document.add_heading('Dialogue', level=1)
        p.add_run().bold = True

        for y in range(0, len(dialogue_list)):
            if y % 2 == 0:  # Even for First speaker
                self.document.add_paragraph(dialogue_list[y])
            else:  # Odd numbers for Second speaker
                paragraph = self.document.add_paragraph(dialogue_list[y])
                # Make this paragraph to align right
                paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT

        if includeProcessed:
            self._include_processed_dialogue(single_sample)
        ######################
        #        Save        #
        ######################
        self._saver(self.document, fname)

        print("\nVisualization result has been saved into {}".format(
            self.path_results))
        return True

    def _include_processed_dialogue(self, single_sample):
        """
        Create a document (word file) for the processed logfile

        single_sample_output.update({
            'uid': uid,
            'story': story,
            'entity_provided': table_entity,
            'knowledge': knowledge_dict,
            'attitudes': attitudes_dict,
            'questions': question_dict,
            'answers': answer_dict,
            'named_entities': 'empty',
            'dialogue_orig': self.dialogue,
            'dialogue_tokenized': dialogue_tokenized,
            'dialogue_processesd': 'empty',
            'dialogue_named_entity': 'empty'
            })
        """
        #########################################
        #        Add the other dialogues        #
        #########################################
        dict_inputs = {
            'dialogue_original': 'Original Dialogue',
            'dialogue_processesd': 'Processesd Dialogue',
            'dialogue_named_entity': 'Named Entity Dialogue'
        }
        ###################################################################
        #            Input the selected processed dialogue                #
        ###################################################################
        for entity in single_sample:
            if entity in dict_inputs:
                p = self.document.add_heading(dict_inputs[entity], level=1)
                p.add_run().bold = True

                if single_sample[entity] == 'empty':
                    self.document.add_paragraph(
                        "{} not yet added".format(entity))
                else:
                    dialogue_list = single_sample[entity]
                    for y in range(0, len(dialogue_list)):
                        if y % 2 == 0:  # Even for First speaker
                            self.document.add_paragraph(dialogue_list[y])
                        else:  # Odd numbers for Second speaker
                            paragraph = self.document.add_paragraph(
                                dialogue_list[y])
                            # Make this paragraph to align right
                            paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT

    def _saver(self, _document, _fname):
        if not os.path.exists(self.path_results):
            os.makedirs(self.path_results)

        doc_path = os.path.join(self.path_results, _fname)
        _document.save(doc_path)
예제 #45
0
 def get_info(self, id, path):
     link = 'https://www.iesdouyin.com/share/user/' + str(id)
     response = requests.get(link, headers=self.__headers)
     html = pq(response.text)
     imgUrl = html(
         '#pagelet-user-info > div.personal-card > div.info1 > span.author > img'
     ).attr('src')
     headers = {
         'user-agent':
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
     }
     tempRsp = requests.get(imgUrl, headers=headers)
     name = html(
         '#pagelet-user-info > div.personal-card > div.info1 > p.nickname'
     ).text()
     self.__create_path(path, name)
     file = open(path + '/' + name + '/temp.png', 'wb')
     file.write(tempRsp.content)
     file.close()
     info = html(
         '#pagelet-user-info > div.personal-card > div.info2 > div > span'
     ).text()
     signature = html(
         '#pagelet-user-info > div.personal-card > div.info2 > p.signature'
     ).text()
     id = self.__trans(
         re.compile('<p class="shortid">(.*?)</p>').findall(
             response.text)[0])
     focus = self.__trans(
         re.compile('<span class="num">(.*?)</span>').findall(
             response.text)[0])
     follow = self.__trans(
         re.compile(
             '<span class="follower block"><span class="num">(.*?)</span> </span>'
         ).findall(response.text)[0])
     liked = self.__trans(
         re.compile(
             '<span class="liked-num block"><span class="num">(.*?)</span>'
         ).findall(response.text)[0])
     production = self.__trans(
         re.compile(
             '<div class="user-tab active tab get-list" data-type="post">(.*?)</div>'
         ).findall(response.text)[0])
     like = self.__trans(
         re.compile(
             '<div class="like-tab tab get-list" data-type="like">(.*?)</div>'
         ).findall(response.text)[0])
     id = pq(id).text().replace(' ', '').replace('抖音ID:', '')
     focus = pq(focus).text().replace(' ', '')
     follow = pq(follow).text().replace(' ', '').replace('粉丝', '')
     liked = pq(liked).text().replace(' ', '')
     production = pq(production).text().replace(' ', '').replace('作品', '')
     like = pq(like).text().replace(' ', '').replace('喜欢', '')
     document = Document()
     document.add_heading(name, 0)
     document.add_picture(path + '/' + name + '/temp.png',
                          width=Inches(1.25))
     self.__add_param(document, '抖音ID', id)
     document.add_paragraph(info)
     document.add_paragraph(signature)
     self.__add_param(document, '关注', focus)
     self.__add_param(document, '粉丝', follow)
     self.__add_param(document, '赞', liked)
     self.__add_param(document, '作品', production)
     self.__add_param(document, '喜欢', like)
     document.save(path + '/' + name + '/info.docx')
     os.remove(path + '/' + name + '/temp.png')
예제 #46
0
import csv

doc_path = "C:/Users/Andrew/Documents/Vocab/Adverb-Stage3-Vocab-excercise.docx"
my_doc = Document(doc_path)

# 5 words in a group
for i in range(25):
    with open('C:/Users/Andrew/Documents/Vocab/AdvYearmiddlelist.csv',
              'r') as file:
        start = i * 10
        n = 0
        finish = start + 10
        print(start)
        print(finish)
        word_list = csv.reader(file.readlines()[start:finish])
        my_doc.add_paragraph('------ Exercise ------')
        word_group = []
        # print the questions
        for item in word_list:
            url_word = ''.join(item)
            word_group.append(url_word)
            print(word_group)
            n = n + 1
            # url = "https://dictionary.cambridge.org/dictionary/english/" + url_word

            # url = "https://tangorin.com/sentences?search=" + url_word
            # hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0)'}
            # req = urllib.request.Request(url, headers=hdr)
            # page_html = urllib.request.urlopen(req).read()
            # page_soup = bs(page_html, "html.parser")
            # # my_divs = page_soup.find_all("div", {"class": 'examp dexamp'})
예제 #47
0
def generate():
    document = Document()
    document.add_heading('病人病历报告', 0)

    # time tool
    time_tup = time.localtime(time.time())
    # print(time_tup)
    format_time = '%Y-%m-%d %a %H:%M:%S'
    cur_time = time.strftime(format_time, time_tup)
    # print(cur_time)

    df = pd.read_csv('information.csv')
    name = df['Name'].values[0]
    volume = df['Volume'].values[0]
    mass = df['Mass'].values[0]
    sex = df['Sex'].values[0]
    age = df['Age'].values[0]
    # print(name, volume, mass)

    gf = pd.read_csv('image_information.csv')
    image_raw = gf['image_raw'].values
    image_masked = gf['image_masked'].values
    image_value = gf['value'].values
    # print(image_raw, image_raw, image_value)

    p = document.add_paragraph(cur_time)
    # p.add_run('bold').bold = True
    # p.add_run(' and some ')
    # p.add_run('italic.').italic = True

    document.add_heading('病人信息', level=1)
    # document.add_paragraph('肺结节诊断', style='Intense Quote')

    document.add_paragraph('姓名:', style='List Bullet')
    document.add_paragraph('        ' + name.capitalize())
    document.add_paragraph('性别:', style='List Bullet')
    document.add_paragraph('        ' + sex.capitalize())
    document.add_paragraph('年龄:', style='List Bullet')
    document.add_paragraph('        ' + str(age))

    # document.add_paragraph(
    #     'first item in ordered list', style='List Number'
    # )

    records = ((name, volume, mass))
    # print(records)

    document.add_heading('肺结节情况', level=1)
    p = document.add_paragraph()
    r = p.add_run()
    for i in range(len(image_masked)):
        r.add_picture(image_raw[i], width=Inches(1.25))
        r.add_text('                                     ')
        r.add_picture(image_masked[i], width=Inches(1.25))
        r.add_text('                                     ')
        r.add_text(str(image_value[i]))

    table = document.add_table(rows=1, cols=3, style="Table Grid")
    hdr_cells = table.rows[0].cells
    hdr_cells[0].text = 'Name'
    hdr_cells[1].text = 'Volume'
    hdr_cells[2].text = 'Mass'
    row_cells = table.add_row().cells
    row_cells[0].text = name
    row_cells[1].text = str(volume)
    row_cells[2].text = str(mass)

    document.add_page_break()
    document.save('病历报告.docx')
예제 #48
0
class HtmlToDocx(HTMLParser):
    def __init__(self):
        super().__init__()
        self.options = {
            'fix-html': True,
            'images': True,
            'tables': True,
            'styles': True,
        }

    def set_initial_attrs(self, document=None):
        self.tags = {
            'span': [],
            'list': [],
        }
        if document:
            self.doc = document
        else:
            self.doc = Document()
        self.bs = self.options[
            'fix-html']  # whether or not to clean with BeautifulSoup
        self.document = self.doc
        self.include_tables = True  #TODO add this option back in?
        self.include_images = self.options['images']
        self.include_styles = self.options['styles']
        self.paragraph = None
        self.skip = False
        self.skip_tag = None
        self.instances_to_skip = 0

    def get_cell_html(self, soup):
        # Returns string of td element with opening and closing <td> tags removed
        if soup.find_all():
            return '\n'.join(str(soup).split('\n')[1:-1])
        return str(soup)[4:-5]

    def add_styles_to_paragraph(self, style):
        if 'text-align' in style:
            align = style['text-align']
            if align == 'center':
                self.paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
            elif align == 'right':
                self.paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT
            elif align == 'justify':
                self.paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
        if 'margin-left' in style:
            margin = style['margin-left']
            units = re.sub(r'[0-9]+', '', margin)
            margin = int(re.sub(r'[a-z]+', '', margin))
            if units == 'px':
                self.paragraph.paragraph_format.left_indent = Inches(
                    min(margin // 10 * INDENT, MAX_INDENT))
            # TODO handle non px units

    def add_styles_to_run(self, style):
        if 'color' in style:
            if 'rgb' in style['color']:
                color = re.sub(r'[a-z()]+', '', style['color'])
                colors = [int(x) for x in color.split(',')]
            else:
                color = style['color'].lstrip('#')
                colors = tuple(int(color[i:i + 2], 16) for i in (0, 2, 4))
            self.run.font.color.rgb = RGBColor(*colors)

        if 'background-color' in style:
            if 'rgb' in style['background-color']:
                color = color = re.sub(r'[a-z()]+', '',
                                       style['background-color'])
                colors = [int(x) for x in color.split(',')]
            else:
                color = style['background-color'].lstrip('#')
                colors = tuple(int(color[i:i + 2], 16) for i in (0, 2, 4))
            self.run.font.highlight_color = WD_COLOR.GRAY_25  #TODO: map colors

    def parse_dict_string(self, string, separator=';'):
        new_string = string.replace(" ", '').split(separator)
        string_dict = dict([x.split(':') for x in new_string if ':' in x])
        return string_dict

    def handle_li(self):
        # check list stack to determine style and depth
        list_depth = len(self.tags['list'])
        if list_depth:
            list_type = self.tags['list'][-1]
        else:
            list_type = 'ul'  # assign unordered if no tag

        if list_type == 'ol':
            list_style = "List Number"
        else:
            list_style = 'List Bullet'

        self.paragraph = self.doc.add_paragraph(style=list_style)
        self.paragraph.paragraph_format.left_indent = Inches(
            min(list_depth * LIST_INDENT, MAX_INDENT))
        self.paragraph.paragraph_format.line_spacing = 1

    def add_image_to_cell(self, cell, image):
        # python-docx doesn't have method yet for adding images to table cells. For now we use this
        paragraph = cell.add_paragraph()
        run = paragraph.add_run()
        run.add_picture(image)

    def handle_img(self, current_attrs):
        if not self.include_images:
            self.skip = True
            self.skip_tag = 'img'
            return
        src = current_attrs['src']
        # fetch image
        src_is_url = is_url(src)
        if src_is_url:
            try:
                image = fetch_image(src)
            except urllib.error.URLError:
                image = None
        else:
            image = src
        # add image to doc
        if image:
            try:
                if isinstance(self.doc, docx.document.Document):
                    self.doc.add_picture(image)
                else:
                    self.add_image_to_cell(self.doc, image)
            except FileNotFoundError:
                image = None
        if not image:
            if src_is_url:
                self.doc.add_paragraph("<image: %s>" % src)
            else:
                # avoid exposing filepaths in document
                self.doc.add_paragraph("<image: %s>" %
                                       get_filename_from_url(src))
        # add styles?

    def handle_table(self):
        """
        To handle nested tables, we will parse tables manually as follows:
        Get table soup
        Create docx table
        Iterate over soup and fill docx table with new instances of this parser
        Tell HTMLParser to ignore any tags until the corresponding closing table tag
        """
        table_soup = self.tables[self.table_no]
        rows, cols = self.get_table_dimensions(table_soup)
        self.table = self.doc.add_table(rows, cols)
        rows = table_soup.find_all('tr', recursive=False)
        cell_row = 0
        for row in rows:
            cols = row.find_all(['th', 'td'], recursive=False)
            cell_col = 0
            for col in cols:
                cell_html = self.get_cell_html(col)
                if col.name == 'th':
                    cell_html = "<b>%s</b>" % cell_html
                docx_cell = self.table.cell(cell_row, cell_col)
                child_parser = HtmlToDocx()
                child_parser.add_html_to_cell(cell_html, docx_cell)
                cell_col += 1
            cell_row += 1

        # skip all tags until corresponding closing tag
        self.instances_to_skip = len(table_soup.find_all('table'))
        self.skip_tag = 'table'
        self.skip = True
        self.table = None

    def handle_starttag(self, tag, attrs):
        if self.skip:
            return
        if tag == 'head':
            self.skip = True
            self.skip_tag = tag
            self.instances_to_skip = 0
            return
        elif tag == 'body':
            return

        current_attrs = dict(attrs)

        if tag == 'span':
            self.tags['span'].append(current_attrs)
            return
        elif tag == 'ol' or tag == 'ul':
            self.tags['list'].append(tag)
            return  # don't apply styles for now
        elif tag == 'br':
            self.run.add_break()
            return

        self.tags[tag] = current_attrs
        if tag == 'p':
            self.paragraph = self.doc.add_paragraph()

        elif tag == 'li':
            self.handle_li()

        elif tag[0] == 'h' and len(tag) == 2:
            if isinstance(self.doc, docx.document.Document):
                h_size = int(tag[1])
                self.paragraph = self.doc.add_heading(level=min(h_size, 9))
            else:
                self.paragraph = self.doc.add_paragraph()

        elif tag == 'img':
            self.handle_img(current_attrs)
            return

        elif tag == 'table':
            self.handle_table()
            return

        # set new run reference point in case of leading line breaks
        if tag == 'p' or tag == 'li':
            self.run = self.paragraph.add_run()

        # add style
        if not self.include_styles:
            return
        if 'style' in current_attrs and self.paragraph:
            style = self.parse_dict_string(current_attrs['style'])
            self.add_styles_to_paragraph(style)

    def handle_endtag(self, tag):
        if self.skip:
            if not tag == self.skip_tag:
                return

            if self.instances_to_skip > 0:
                self.instances_to_skip -= 1
                return

            self.skip = False
            self.skip_tag = None
            self.paragraph = None

        if tag == 'span':
            if self.tags['span']:
                self.tags['span'].pop()
                return
        elif tag == 'ol' or tag == 'ul':
            remove_last_occurence(self.tags['list'], tag)
            return
        elif tag == 'a':
            link = self.tags.pop(tag)
            href = link['href']
            self.paragraph.add_run('<link: %s>' % href)
            return
        elif tag == 'table':
            self.table_no += 1
            self.table = None
            self.doc = self.document
            self.paragraph = None

        if tag in self.tags:
            self.tags.pop(tag)
        # maybe set relevant reference to None?

    def handle_data(self, data):
        if self.skip:
            return

        if not self.paragraph:
            self.paragraph = self.doc.add_paragraph()

        self.run = self.paragraph.add_run(data)
        spans = self.tags['span']
        for span in spans:
            if 'style' in span:
                style = self.parse_dict_string(span['style'])
                self.add_styles_to_run(style)

        # add font style
        for tag in self.tags:
            if tag in fonts:
                font_style = fonts[tag]
                setattr(self.run.font, font_style, True)

    def ignore_nested_tables(self, tables_soup):
        """
        Returns array containing only the highest level tables
        Operates on the assumption that bs4 returns child elements immediately after
        the parent element in `find_all`. If this changes in the future, this method will need to be updated

        :return:
        """
        new_tables = []
        nest = 0
        for table in tables_soup:
            if nest:
                nest -= 1
                continue
            new_tables.append(table)
            nest = len(table.find_all('table'))
        return new_tables

    def get_table_dimensions(self, table_soup):
        rows = table_soup.find_all('tr', recursive=False)
        cols = rows[0].find_all(['th', 'td'], recursive=False)
        return len(rows), len(cols)

    def get_tables(self):
        if not hasattr(self, 'soup'):
            self.include_tables = False
            return
            # find other way to do it, or require this dependency?
        self.tables = self.ignore_nested_tables(self.soup.find_all('table'))
        self.table_no = 0

    def run_process(self, html):
        if self.bs and BeautifulSoup:
            self.soup = BeautifulSoup(html, 'html.parser')
            html = remove_whitespace(str(self.soup))
        else:
            html = remove_whitespace(html)
        if self.include_tables:
            self.get_tables()
        self.feed(html)

    def add_html_to_document(self, html, document):
        if not isinstance(html, str):
            raise ValueError('First argument needs to be a %s' % str)
        elif not isinstance(document,
                            docx.document.Document) and not isinstance(
                                document, docx.table._Cell):
            raise ValueError('Second argument needs to be a %s' %
                             docx.document.Document)
        self.set_initial_attrs(document)
        self.run_process(html)

    def add_html_to_cell(self, html, cell):
        if not isinstance(cell, docx.table._Cell):
            raise ValueError('Second argument needs to be a %s' %
                             docx.table._Cell)
        unwanted_paragraph = cell.paragraphs[0]
        delete_paragraph(unwanted_paragraph)
        self.set_initial_attrs(cell)
        self.run_process(html)
        # cells must end with a paragraph or will get message about corrupt file
        # https://stackoverflow.com/a/29287121
        if not self.doc.paragraphs:
            self.doc.add_paragraph('')

    def parse_html_file(self, filename_html, filename_docx=None):
        with open(filename_html, 'r') as infile:
            html = infile.read()
        self.set_initial_attrs()
        self.run_process(html)
        if not filename_docx:
            path, filename = os.path.split(filename_html)
            filename_docx = '%s/new_docx_file_%s' % (path, filename)
        self.doc.save('%s.docx' % filename_docx)
예제 #49
0
letter = 'M'
wordCount = 0
wordsPerFile = 20
fileCount = 1

docxName1 = 'test.docx'
docxName2 = letter+str(fileCount)+'.docx'

document1 = Document(docxName1)
document2 = Document(docxName2)


for paragraph in document1.paragraphs:
    if re.match('\d{4}', paragraph.text) is not None:
        wordCount += 1
        if wordCount == 21:
            wordCount = 1
            document2.save(docxName2)
            print('words in {}: 20'.format(docxName2))
            fileCount += 1
            docxName2 = letter + str(fileCount) + '.docx'
            document2 = Document(docxName2)
        paraInsert = document2.add_paragraph()
        fromParatoPara(paraInsert, paragraph)
    else:
        paraInsert = document2.add_paragraph()
        fromParatoPara(paraInsert, paragraph)

document2.save(docxName2)
print('words in {}: {}'.format(docxName2, wordCount))
print('finish, words count: {}'.format((fileCount-1) * wordsPerFile + wordCount))
예제 #50
0
def cargaexcel(request):
    if request.method == 'POST':
        uploaded_file = request.FILES['document']
        fs = FileSystemStorage()
        name = fs.save(uploaded_file.name, uploaded_file)

        xls = pd.ExcelFile(BASE_DIR + fs.url(name))
        hojas = xls.sheet_names
        print(len(hojas))
        dc1 = Document()
        def mat(abcd):
            pri = 0
            val = 0
            aux = []
            for i in range(15):
                aux.append([])
                for j in range(5):
                    aux[i].append(None)

            for i in range(len(abcd)):
                sec = 0
                val = 0
                for j in range(len(abcd[i])):

                    if type(abcd[i][j]) is str:
                        val += 1
                        aux[pri][sec] = abcd[i][j]
                        sec += 1
                    else:
                        if type(abcd[i][j]) is int:
                            abcd[i][j] = str(abcd[i][j])
                            val += 1
                            aux[pri][sec] = abcd[i][j]
                            sec += 1
                        else:
                            if type(abcd[i][j]) is float:
                                if abcd[i][j]/abcd[i][j]==1:
                                    abcd[i][j] = str(abcd[i][j])
                                    val += 1
                                    aux[pri][sec] = abcd[i][j]
                                    sec += 1

                if val > 0:
                    pri += 1

            return aux
        aux=0

        for cont in range(len(hojas)-1):
            cont += 1
            df = xls.parse(hojas[cont])

            fich = df.__array__()

            real = mat(fich);
            fich = real
            print(fich)

            tit = fich[0][0]
            anio = str(fich[4][1])
            nota1 = str(fich[10][1])
            nota2 = str(fich[10][2])
            nota3 = str(fich[10][3])
            promedio = str(fich[11][1])

            dc1.add_heading(tit, 0)
            dc1.add_heading(fich[1][0], 2)
            p = dc1.add_paragraph()
            # p.add_run('\n')

            tbl1 = dc1.add_table(rows=0, cols=2)
            fila = tbl1.add_row().cells
            fila[0].text = fich[2][0]
            fila[1].text = fich[2][1]
            # p.add_run(fich[2][0] + ' \t').bold = True
            # p.add_run(fich[2][1])

            fila1 = tbl1.add_row().cells
            fila1[0].text = fich[3][0]
            fila1[1].text = fich[3][1]
            # p.add_run('\n\n')
            # p.add_run(fich[3][0] + ' \t').bold = True
            # p.add_run(fich[3][1])

            fila2 = tbl1.add_row().cells
            fila2[0].text = fich[4][0]
            fila2[1].text = anio
            # p.add_run('\n\n')
            # p.add_run(fich[4][0] + ' \t').bold = True
            # p.add_run(anio)

            dc1.add_heading(fich[5][0], 2)
            p1 = dc1.add_paragraph()
            # p1.add_run('\n')

            tbl2 = dc1.add_table(rows=0, cols=2)
            fila = tbl2.add_row().cells
            fila[0].text = fich[6][0]
            fila[1].text = fich[6][1]

            # p1.add_run(fich[6][0] + ' \t').bold = True
            # p1.add_run(fich[6][1])

            fila1 = tbl2.add_row().cells
            fila1[0].text = fich[7][0]
            fila1[1].text = fich[7][1]

            # p1.add_run('\n\n')
            # p1.add_run(fich[7][0] + ' \t').bold = True
            # p1.add_run(fich[7][1])

            dc1.add_heading(fich[8][0], 2)
            p2 = dc1.add_paragraph()
            # p2.add_run('\n')

            tbl3 = dc1.add_table(rows=0, cols=4)
            fila = tbl3.add_row().cells
            fila[0].text = fich[9][0]
            fila[1].text = fich[9][1]

            # p2.add_run(fich[9][0] + ' \t').bold = True
            # p2.add_run(fich[9][1])

            fila1 = tbl3.add_row().cells
            fila1[0].text = fich[10][0]
            fila1[1].text = nota1
            fila1[2].text = nota2
            fila1[3].text = nota3

            # p2.add_run('\n\n')
            # p2.add_run(fich[10][0] + ' \t').bold = True
            # p2.add_run(nota1+ ' \t'+nota2+ ' \t'+nota3+ ' \t')

            fila2 = tbl3.add_row().cells
            fila2[0].text = fich[11][0]
            fila2[1].text = promedio

            # p2.add_run('\n\n')
            # p2.add_run(fich[11][0] + ' \t').bold = True
            # p2.add_run(promedio)

            dc1.add_heading(fich[12][0], 2)
            p3 = dc1.add_paragraph()

            tbl4 = dc1.add_table(rows=0, cols=2)
            fila = tbl4.add_row().cells
            fila[0].text = fich[13][0]
            fila[1].text = fich[13][1]

            # p3.add_run('\n')
            # p3.add_run(fich[13][0] + ' \t').bold = True
            # p3.add_run(fich[13][1])

            fila1 = tbl4.add_row().cells
            fila1[0].text = fich[14][0]
            fila1[1].text = fich[14][1]

            # p3.add_run('\n\n')
            # p3.add_run(fich[14][0] + ' \t').bold = True
            # p3.add_run(fich[14][1])

            # dc1.add_picture('C:/Users/Angel/Desktop/prueba/excel/imgplot.png')



            """
            nombimg = 'img' + str(aux) + '.jpg'
            num = int(nota)
            nomb = str(fich[2][1])
            nombre = ("", nomb, "")
            posicion_y = np.arange(3)
            unidad = (0, num, 0)

            plt.barh(posicion_y, unidad, align="center")
            plt.yticks(posicion_y, nombre)
            plt.xlabel("NOTA")
            plt.title("NOTAS")
            plt.savefig(BASE_DIR+'/media/'+nombimg)



            dc1.add_picture(BASE_DIR+'/media/'+nombimg)

            os.remove(BASE_DIR+'/media/'+nombimg)
            """

            dc1.add_page_break()


        nombre_archivo = "Proyecto.docx"
        response = HttpResponse(content_type="application/msword")
        contenido = "attachment; filename= {0}".format(nombre_archivo)
        response["Content-Disposition"] = contenido
        dc1.save(response)

        return response
        #return render(request, 'word/index.html', {'url':fs.url(name)})

    return render(request, 'word/index.html')
예제 #51
0
def Doc(Pic_Address, Company, Invoice_num, Invoice_Date, Company_Address,
        Customer_Name, Customer_Address, Email, Phone, Detail_top,
        Customer_Phone, Product_List, Extra_Info, Discount, Amount):
    document = Document("default.docx")

    section = document.sections
    for i in section:
        i.left_margin = Inches(0.75)
        i.right_margin = Inches(0.5)
        i.top_margin = Inches(0.4)
        i.bottom_margin = Inches(0.4)

    Pics(document, Pic_Address)
    para = document.add_paragraph("")
    Line1 = Line_sep(Var1=Company,
                     Seperator=40,
                     Var2="Invoice",
                     Full=60,
                     Var3=Invoice_num)
    Line(para, Line1, Size=24, Bold=True, NextL=False)
    Line2 = Line_sep(Var1="",
                     Seperator=129,
                     Var2="Invoice Date",
                     Full=161,
                     Var3=Invoice_Date)
    Line(para, Line2, Size=11)
    Line(para, Company_Address, Size=11)
    Line(para, Line=Detail_top, Size=11)
    Line(para, Email, Size=11)
    Line(para, Phone, Size=11)
    #Customer
    Line3 = Line_sep(Var1="Bill To:",
                     Seperator=90,
                     Var2="Ship To:",
                     Full=None,
                     Var3="")
    Line(para, Line3, Size=11, Bold=True)
    Line(para, Line=Customer_Name, Size=13, Bold=True)
    Line(para, Line=Customer_Address, Size=11)
    Line(para, Line=Customer_Phone, Size=11)
    #table
    table = Create_table(document)
    Assign_table(table, Product_List)
    #after table
    para1 = document.add_paragraph("")
    Line(para1, Line=Extra_Info, Size=11, NextL=False)
    Sub_Total = Amount
    Line4 = Line_sep(Var1="",
                     Seperator=80,
                     Var2="Sub Total     ",
                     Full="SPECIAL",
                     Var3=str(Sub_Total))
    Line(para1, Line=Line4, Bold=True, Size=18)
    Line5 = Line_sep(Var1="",
                     Seperator=81,
                     Var2="Discount     ",
                     Full="SPECIAL",
                     Var3=Discount)
    Line(para1, Line=Line5, Bold=True, Size=18)
    Grand = float(Sub_Total) - float(Discount)
    Line6 = Line_sep(Var1="",
                     Seperator=57,
                     Var2="Grand Total   ",
                     Full="SPECIAL",
                     Var3=str(Grand))
    Line(para1, Line=Line6, Bold=True, Size=22)
    Line(para1, Line=" ", Size=12)
    line7 = "\n\nCustomer Signature                                                                                                                                     Signature"
    Line(para1, Line=line7, Size=12)
    document.save("Invoice\Invoice   " + Invoice_num + ".docx")
예제 #52
0
def readingFile(path1, path2, mode):

    totalCompare = 0
    errorCompare = 0

    # using docx module
    document = Document()

    #opening the file
    fil = open(path1, "r+")
    fil1 = open(path2, "r+")

    #calculating the number of lines in the files
    countfil = 0
    countfil1 = 0

    print("calculating the length of file 1 : ")

    for i in fil:
        countfil += 1

    os.system("cls")

    print("calculating the length of file 2 : ")

    for i in fil1:
        countfil1 += 1

    count = 1

    # calculating the length when the two files will be zipped for running for loop
    if (countfil > countfil1):
        totalCompare = countfil
        fromLen = countfil1
    else:
        totalCompare = countfil1
        fromLen = countfil

    # closing and reopening file - as their is some error ocurring if we dont do this - the zipped for loop was not running
    fil.close()
    fil1.close()

    os.system("cls")

    print("starting to compare : ")

    fil = open(path1, "r+")
    fil1 = open(path2, "r+")

    # running the for loop as both files zipped so that we can get the lines from both files at the same time
    for i, j in zip(fil, fil1):
        print("on line {} outOf {}".format(count, fromLen))

        # getting the lines
        string1 = i.strip()
        string2 = j.strip()

        # if the mode is 1 converting both the captuared lines to lower case to remove case sensitivity
        if (mode == 1):
            string1 = string1.lower()
            string2 = string2.lower()
        else:
            pass

        # getting the individual words in the lines captured in string 1 and sting 2 - so that we can output the colored words which do not match
        myList1 = list(string1.split())
        myList2 = list(string2.split())

        myList1Len = len(myList1)
        myList2Len = len(myList2)

        # making the list of same length - inserting " " in the list which has less elements - to avoid any kind of error like while running for loop we may get a[i] - their can be nothing at i position - to remove this error we are doing this
        if (myList1Len == myList2Len):
            pass

        else:
            if (myList1Len > myList2Len):
                elementsToBeInserted = myList1Len - myList2Len
                for i in range(elementsToBeInserted):
                    myList2.append(" ")
            else:
                elementsToBeInserted = myList2Len - myList1Len
                for i in range(elementsToBeInserted):
                    myList1.append(" ")

        # now since both list are of same length and also have same elements so if list1 == list2 - then the string1 will be equal to string2
        if (myList1 == myList2):
            pass

        # if the list is not same then looping over each element of both list and comparing individual elements
        else:
            # using docx
            stringHeading = "Error on line " + str(count) + " : "
            document.add_heading(stringHeading, level=1)
            p = document.add_paragraph(style='List Bullet')

            # starting to loop through each element on both list at the same time - as the both list are of same length so index will return same position element from both list

            # looping over for printing the data of the line in txt1
            for index in range(len(myList1)):
                i = myList1[index]
                j = myList2[index]
                stringToadd = str(i) + " "

                # if the elements match then the output will be in black colour
                if (i == j):
                    p.add_run(stringToadd)
                #if donot match then the word will be outputed in red colour
                else:
                    errorCompare = errorCompare + 1
                    run = p.add_run(stringToadd)
                    run.font.color.rgb = RGBColor(0xff, 0x00, 0x00)

            p = document.add_paragraph(style='List Bullet')

            # looping over again to print the data of line in txt2

            for index in range(len(myList1)):
                i = myList1[index]
                j = myList2[index]

                stringToadd = str(j) + " "

                if (i == j):
                    p.add_run(stringToadd)
                else:
                    errorCompare = errorCompare + 1
                    run = p.add_run(stringToadd)
                    run.font.color.rgb = RGBColor(0xff, 0x00, 0x00)

        # increasing the count to keep tarck were we have reached
        count += 1
        errorCompare = errorCompare / 2

    trackCount = count - 1

    count = 0

    # again closing and opening the file to avoid errors
    fil.close()
    fil1.close()

    os.system("cls")

    print("outputting extra lines founded : ")

    fil = open(path1, "r+")
    fil1 = open(path2, "r+")

    # if the txt1 has more lines than txt2
    if (countfil > countfil1):
        stringHeading = "these are the extra lines after the " + str(
            countfil1) + " in txt file no1 : "
        document.add_heading(stringHeading, level=1)
        p = document.add_paragraph()
        # looping to get the elements in txt1
        for i in fil:
            # will start printing after the length till we have compared - as we used zip above - we compare only till the length of the smaller txt file
            if (trackCount <= count):
                string1 = i.strip()
                p.add_run(string1)
                p.add_run("\n")
                errorCompare = errorCompare + 1
            count += 1

    # if the txt2 has more lines than txt2
    elif (countfil < countfil1):
        stringHeading = "these are the extra lines after the " + str(
            countfil) + " in txt file no2 : "
        document.add_heading(stringHeading, level=1)
        p = document.add_paragraph()
        for i in fil1:
            if (trackCount <= count):
                string1 = i.strip()
                p.add_run(string1)
                p.add_run("\n")
                errorCompare = errorCompare + 1
            count += 1

    # using docx and finally outputting the data
    document.save('output_result.docx')
    fil.close()
    fil1.close()

    percentageMatched = 100 - ((errorCompare / totalCompare) * 100)

    return percentageMatched
예제 #53
0
from docx import Document
from docx.shared import Inches

document = Document()  #创建文档
document.add_heading('Document Title', 0)  #大标题等级为0

p = document.add_paragraph('A plain paragraph having some')  #添加文本段落
p.add_run('bold').bold = True  #添加文本为粗体
p.add_run(' and some ')
p.add_run('italic.').italic = True  #添加文本为斜体

document.add_heading('Heading, level 1', level=1)
document.add_paragraph('Intense quote', style='IntenseQuote')  #添加文本段落样式为Style
document.add_paragraph('first item in unordered list', style='ListBullet')
document.add_paragraph('fisrt item in ordered list', style='ListNumber')
document.add_picture('python_logo.gif', width=Inches(1.25))  #添加图片宽度为1.25英寸

table = document.add_table(rows=1, cols=3)  #创建了一行三列的表格
hdr_cells = table.rows[0].cells  #取得这个表格的第一行
hdr_cells[0].text = 'Qty'  #为列名赋值
hdr_cells[1].text = 'Id'
hdr_cells[2].text = 'Desc'
for item in range(3):
    row_cells = table.add_row().cells  #添加新行
    row_cells[0].text = str(item)  #为每列赋值
    row_cells[1].text = str(item * 100)
    row_cells[2].text = str(item) + '10'

document.add_page_break()  #添加分页符到文档末尾
document.save('hello.docx')  #保存文档
예제 #54
0
        #extract the variables
        name = values["name"]
        address = values["address"]
        salutation = values["salutation"]
        bodytext = values["bodytext"]
        signature = values["signature"]
        now = datetime.utcnow()
        todays_date = now.strftime('%Y-%m-%d')

        #prepare the letter
        try:
            document = Document('letterhead.docx')
        except:
            document = Document()

        p = document.add_paragraph(todays_date)
        p = document.add_paragraph(" ")
        p = document.add_paragraph(" ")
        p = document.add_paragraph(name + '\n'  + address )
        p = document.add_paragraph(" ")
        p = document.add_paragraph(" ")
        p = document.add_paragraph(salutation + " " + name + ",")
        p = document.add_paragraph(" ")
        p = document.add_paragraph(bodytext)
        p = document.add_paragraph(" ")
        p = document.add_paragraph(signature)

        document.save(name + " - " + todays_date +".docx")
        window['outputline'].update('Created ' + name + ' ' + todays_date)

    except Exception as e:
예제 #55
0
def saveToDocx(data, fileName):

    document = Document()

    document.add_heading("Email: " + data["email"])
    document.add_paragraph("UUID: " + data["uuid"])
    document.add_heading("Page Url:" + data["url"], level=2)

    document.add_heading("Title", level=3)
    document.add_paragraph(data["title"][0])

    document.add_heading("HyperLink's", level=3)
    for item in data["link"]:
        document.add_paragraph(item)

    document.add_heading("Span's", level=3)
    for item in data["span"]:
        document.add_paragraph(item)

    document.add_heading("Paragraph's", level=3)
    for item in data["paragraph"]:
        document.add_paragraph(item)

    document.add_heading("Heading's 1", level=3)
    for item in data["heading 1"]:
        document.add_paragraph(item)

    document.add_heading("Heading's 2", level=3)
    for item in data["heading 2"]:
        document.add_paragraph(item)

    document.add_heading("Heading's 3", level=3)
    for item in data["heading 3"]:
        document.add_paragraph(item)

    document.add_heading("Heading's 4", level=3)
    for item in data["heading 4"]:
        document.add_paragraph(item)

    document.add_heading("Other's", level=3)
    for item in data["other"]:
        document.add_paragraph(item)

    document.save(fileName + ".docx")
class Transformer:
    
    def __init__(self, filename='no_file', count=[10]):
        self.document = Document()
        self.answer = Document()
        self.filename = filename
        self.count = count
        style = self.document.styles['Normal']
        font = style.font
        font.name = 'Times New Roman'
        font.size = Pt(12)

        paragraph_format = style.paragraph_format
        paragraph_format.space_before = 0
        paragraph_format.space_after = 0
        paragraph_format.line_spacing = 1.15

    def initiate(self):
        xl = xlrd.open_workbook(self.filename)
        sheetnames = xl.sheet_names()
        for idx, sheet_name in enumerate(xl.sheets()):
            total_rows = sheet_name.nrows
            total_required = self.count[idx]
            if(total_required >= total_rows):
                n = range(1,total_rows)
            else:    
                n = random.sample(range(1, total_rows), total_required)
            p = self.document.add_paragraph()
            p.add_run(sheetnames[idx]).bold = True
            pa = self.answer.add_paragraph()
            pa.add_run(sheetnames[idx]).bold = True
            for i in n:
                q = self.document.add_paragraph(style='ListNumber')
                q.add_run(sheet_name.cell(i, 1).value).bold = True
                self.document.add_paragraph('a) '+str(sheet_name.cell(i, 2).value), style='List')
                self.document.add_paragraph('b) '+str(sheet_name.cell(i, 3).value), style='List')
                self.document.add_paragraph('c) '+str(sheet_name.cell(i, 4).value), style='List')
                self.document.add_paragraph('d) '+str(sheet_name.cell(i, 5).value), style='List')
                self.document.add_paragraph( style='List')

                qa = self.answer.add_paragraph(style='ListNumber')
                qa.add_run(sheet_name.cell(i, 1).value).bold = True
                self.answer.add_paragraph('a) '+str(sheet_name.cell(i, 2).value), style='List')
                self.answer.add_paragraph('b) '+str(sheet_name.cell(i, 3).value), style='List')
                self.answer.add_paragraph('c) '+str(sheet_name.cell(i, 4).value), style='List')
                self.answer.add_paragraph('d) '+str(sheet_name.cell(i, 5).value), style='List')
                aa = self.answer.add_paragraph()
                aa.add_run("Answer: "+sheet_name.cell(i, 6).value.lower()).bold = True
                self.answer.add_paragraph( style='List')
        return self.document, self.answer
예제 #57
0
 def create(self, data=[]):
     if not os.path.exists('C:/LinkedinDocs'):
         os.mkdir('C:/LinkedinDocs')
     for user in data:
         document = Document()
         currentTime = time.time()
         currentTime = str(currentTime).split('.')[1]
         document.add_heading(
             user.get('personal_info', "Error 1").get('name', "Error 2"))
         document.add_heading(user.get('personal_info', "Error 1").get(
             'headline', "Error 4"),
                              level=2)
         phone = user.get('personal_info',
                          "Error 1").get('phone', 'Phone error')
         email = user.get('personal_info',
                          "Error 1").get('email', 'email Error')
         table = document.add_table(rows=1, cols=2)
         cell1 = table.cell(0, 0)
         cell1.text = str(phone)
         cell2 = table.cell(0, 1)
         cell2.text = str(email)
         document.add_paragraph(
             user.get('personal_info', "Error 1").get('summary', "Error 5"))
         document.add_heading("Experience", level=2)
         for job in user.get('experiences',
                             "Error 3").get('jobs', "Error 4"):
             document.add_paragraph(job.get('title'), style='List Bullet')
             document.add_paragraph(job.get('date_range'))
             document.add_paragraph(
                 str(job.get('description')).replace("\n", "").strip())
         document.add_heading("Skills", level=2)
         for skill in user.get('skills'):
             document.add_paragraph(skill.get('name'), style='List Bullet')
         document.add_heading("Education", level=2)
         for edu in user.get('experiences',
                             "Error 3").get('education', "Error 4"):
             document.add_paragraph(str(edu.get('field_of_study')) + '\t' +
                                    str(edu.get('date_range')),
                                    style='List Bullet')
             document.add_paragraph(edu.get('degree', ''))
             document.add_paragraph(edu.get('name', "None"))
         name = str(
             user.get('personal_info', "Error 1").get('name', "Error 2"))
         result = re.sub(r'[^a-zA-Z]', " ", name)
         result = re.sub(' +', ' ', result)
         document.save('C:/LinkedinDocs/' + result.strip() + '.docx')
예제 #58
0
# Opening a document
document = Document()

# Adding a heading
'''
In anything but the shortest document,
body text is divided into sections,
each of which starts with a heading.
Here’s how to add one, by default: level=1
'''
document.add_heading('The REAL meaning of the universe')
document.add_heading('The role of dolphins', level=2)

# Adding a paragraph
paragraph = document.add_paragraph('Lorem ipsum dolor sit amet.')

'''
When you add a paragraph by providing text to the .add_paragraph() method, it gets put into a single run. You can add more using the
.add_run() method on the paragraph:
'''
paragraph = document.add_paragraph('Add more text ')
paragraph.add_run('by .add_run() method.')

# Applying bold and italic
paragraph = document.add_paragraph('Applying ')
paragraph.add_run('bold ').bold = True
paragraph.add_run('and ')
paragraph.add_run('italic.').italic = True

# Applying a character style
예제 #59
0
    section.left_margin = Inches(1)
    section.right_margin = Inches(1)
    section.page_width = Inches(6.25)
    section.page_height = Inches(9.25)

letters = [l for l in "abcdefghijklmnopqrstuvwxyz .,"]
length = len(letters)
width = 56

odd = True
for i in range(410):
    num = str(i + 1)
    print('Page ' + num)
    for j in range(46):
        line = ''.join([letters[int(random() * length)] for _ in range(width)])
        doc.add_paragraph(line)
    doc.add_paragraph(' ')
    doc.add_paragraph(' ')
    if odd is True:
        doc.add_paragraph(num)
    else:
        doc.add_paragraph((' ' * (width - len(num))) + num)
    odd = not odd

doc.save('out/book.docx')

# Generate hard cover

from PIL import Image, ImageFont, ImageDraw

size = (2956, 2100)
예제 #60
0
def CreateWordListDocument(
    inputFile,
    outputFile=None,
):
    basename = ""
    (folder_path, shortname) = os.path.split(inputFile)
    (basename, extension) = os.path.splitext(shortname)
    if (outputFile is None):
        filename_docx_string = basename + r'.docx'
        outputFile = os.path.join(folder_path, filename_docx_string)

    #Process the text data, split the English and Chinese sentences/vocabulary

    word_list = generateWordList(inputFile)
    num = len(word_list)
    if (num == 0):
        print(
            f"\n[Error]: There is no vocabulary in the text file {inputFile}, abort! \n"
        )
        return None

    title = basename + " " + "word list"
    title = title.title()
    sent_title = basename + " " + "sentences examples"
    sent_title = sent_title.title()
    sentences_list = []
    Page_topMargin = Cm(1.27)
    Page_bottomMargin = Cm(1.27)
    Page_leftMargin = Cm(1.27)
    Page_rightMargin = Cm(1.27)

    #For A4 page orientation Landscape, if portrait just swap height and width

    A4_Page_Height = Cm(29.7)
    A4_Page_Width = Cm(21)

    #29.7-1.27*2 = 27.16
    #6.5*2+7*2 = 27
    Word_column_width = Cm(3.5)
    Phonetic_column_width = Cm(3.5)
    POS_column_width = Cm(2)
    # 21-1.27*2 -9 =
    Acceptation_column_width = Cm(9.4)

    #Init the document
    document = Document()

    #init the font style
    style = document.styles['Normal']
    style.font.name = 'Tahoma'
    style.element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')
    style.font.size = Pt(12)

    #changing the page margins
    sections = document.sections

    for section in sections:
        section.orientation = WD_ORIENT.PORTRAIT  # set the page landscape or portrait

        if (section.orientation == WD_ORIENT.LANDSCAPE):
            section.page_width = A4_Page_Height
            section.page_height = A4_Page_Width
        else:
            section.page_width = A4_Page_Width
            section.page_height = A4_Page_Height

        section.top_margin = Page_topMargin
        section.bottom_margin = Page_bottomMargin
        section.left_margin = Page_leftMargin
        section.right_margin = Page_rightMargin

    #Add The title
    p = document.add_heading(title, 0)

    #Add the Table
    table = document.add_table(1, 4)
    table.alignment = WD_TABLE_ALIGNMENT.CENTER
    table.style = 'Table Grid'
    table.autofit = False
    table.allow_autofit = False

    HEADER_ROW = 0
    hdr_cells = table.rows[HEADER_ROW].cells  #add table header
    hdr_cells[0].text = "Word"
    hdr_cells[0].width = Word_column_width
    hdr_cells[1].text = "Phonetic"
    hdr_cells[1].width = Phonetic_column_width
    hdr_cells[2].text = "POS"
    hdr_cells[2].width = POS_column_width
    hdr_cells[3].text = "Acceptation"
    hdr_cells[3].width = Acceptation_column_width
    for cell in hdr_cells:
        run = cell.paragraphs[0].runs[0]
        run.font.bold = True
        run.font.size = Pt(12)
    for word_i in tqdm(word_list,
                       desc="Lookup words",
                       total=len(word_list),
                       unit="words"):
        lookup_result = lookupword(word_i)
        if lookup_result is not None:
            sentences_list.append({
                "word": lookup_result['word'],
                "sent": lookup_result['sent']
            })
            #first insert necessary rows
            pos_num = len(lookup_result['pos'])
            for ss in range(pos_num):
                table.add_row()
                current_row = len(table.rows) - 1
                current_cells = table.rows[current_row].cells
                current_cells[0].text = lookup_result['word']
                current_cells[0].vertical_alignment = WD_ALIGN_VERTICAL.CENTER
                current_cells[0].width = Word_column_width
                current_cells[1].text = lookup_result['phonetic']
                current_cells[1].vertical_alignment = WD_ALIGN_VERTICAL.CENTER
                current_cells[1].width = Phonetic_column_width
                current_cells[2].text = lookup_result['pos'][ss]
                current_cells[2].vertical_alignment = WD_ALIGN_VERTICAL.CENTER
                current_cells[2].width = POS_column_width
                current_cells[3].text = lookup_result['acceptation'][ss]
                current_cells[3].vertical_alignment = WD_ALIGN_VERTICAL.CENTER
                current_cells[3].width = Acceptation_column_width
                if (ss >= 1):
                    for col in range(2):  # first 2 columns merged seperately
                        up_cell = table.cell(current_row - 1, col)
                        down_cell = table.cell(current_row, col)
                        down_cell.merge(up_cell)
                    table.cell(current_row, 0).text = lookup_result['word']
                    table.cell(current_row,
                               0).vertical_alignment = WD_ALIGN_VERTICAL.CENTER
                    table.cell(current_row, 0).width = Word_column_width
                    table.cell(current_row, 1).text = lookup_result['phonetic']
                    table.cell(current_row,
                               1).vertical_alignment = WD_ALIGN_VERTICAL.CENTER
                    table.cell(current_row, 1).width = Phonetic_column_width

    columns = table.columns
    columns[0].width = Word_column_width
    columns[1].width = Phonetic_column_width
    columns[2].width = POS_column_width
    columns[3].width = Acceptation_column_width

    #Add example sentences
    if len(sentences_list):

        #Add The title
        p = document.add_heading("\n\n" + sent_title, 0)

        for index, result in enumerate(sentences_list):
            word = result['word']
            sentences = result['sent']
            if (sentences is not None and len(sentences)):
                p = document.add_heading(f"{index+1:>3}. " + word, 1)
                for s_i, sent in enumerate(sentences):
                    English = " ".join(sent.orig.string.splitlines())
                    Chinese = "".join(sent.trans.string.splitlines())

                    Example = f"({(s_i+1):>2} ). " + English + "\n"
                    paragraph = document.add_paragraph(Example,
                                                       style='List')  #添加英文句子
                    paragraph.add_run(Chinese)
                    paragraph.paragraph_format.space_before = Pt(10)

    document.save(outputFile)
    print(
        f"\nVocabulary translation document {outputFile} generated successfully!\n"
    )
    return num