def handler(tag): tds = tag.find_all(name='td') if not tds: print("len(tds) == 0") return None employee = Employee() if len(tds) < 4: print("len(tds) = %d" % (len(tds))) return None name_tag = tds[0] employee.name = name_tag.get_text() employee.name = employee.name.strip() if employee.name == u'姓名': return None ass = name_tag.find_all('a') if ass and len(ass) != 0: employee.url = ass[0]['href'] employee.title = tds[2].get_text().strip() employee.departments = tds[3].get_text().strip() return employee
def handler(tag): tds = tag.find_all(name='td') if not tds: print("len(tds) == 0") return None employee = Employee() if len(tds) < 5: print("len(tds) = %d"%(len(tds))) return None name_tag = None name_tag_idx = 0 if len(tds) == 5: name_tag_idx = 0 name_tag = tds[name_tag_idx] elif len(tds) > 5: name_tag_idx = 1 name_tag = tds[name_tag_idx] employee.name = name_tag.get_text() employee.name = employee.name.strip() if employee.name == u'姓名': return None ass = name_tag.find_all('a') if ass and len(ass) != 0: employee.url = ass[0]['href'] employee.title = tds[name_tag_idx+1].get_text().strip() employee.email = tds[name_tag_idx+2].get_text().strip() employee.tel = tds[name_tag_idx+3].get_text().strip() return employee
def handler(tag): tds = tag.find_all("td") if not tds or len(tds) != 4: return None employee = Employee() ass = tag.find_all('a') if ass and len(ass) != 0: employee.url = ass[0]['href'] employee.name = tds[0].get_text().strip() employee.name = ''.join(employee.name.split()) title = tds[1].get_text() if title and len(title) != 0: employee.title = ''.join(title.split()) email = tds[3].get_text() if email and len(email) != 0: employee.email = ''.join(email.split()) tel = tds[2].get_text() if tel and len(tel) != 0: employee.tel = ''.join(tel.split()) return employee
def handler(tag): tds = tag.find_all(name='td') if not tds: print("len(tds) == 0") return None employee = Employee() if len(tds) < 4: print("len(tds) = %d"%(len(tds))) return None name_tag = tds[0] employee.name = name_tag.get_text() employee.name = employee.name.strip() if employee.name == u'姓名': return None ass = name_tag.find_all('a') if ass and len(ass) != 0: employee.url = ass[0]['href'] employee.title = tds[2].get_text().strip() employee.departments = tds[3].get_text().strip() return employee
def handler(tag): tds = tag.find_all(name='td') if not tds: print("len(tds) == 0") return None employee = Employee() if len(tds) < 5: print("len(tds) = %d" % (len(tds))) return None name_tag = None name_tag_idx = 0 if len(tds) == 5: name_tag_idx = 0 name_tag = tds[name_tag_idx] elif len(tds) > 5: name_tag_idx = 1 name_tag = tds[name_tag_idx] employee.name = name_tag.get_text() employee.name = employee.name.strip() if employee.name == u'姓名': return None ass = name_tag.find_all('a') if ass and len(ass) != 0: employee.url = ass[0]['href'] employee.title = tds[name_tag_idx + 1].get_text().strip() employee.email = tds[name_tag_idx + 2].get_text().strip() employee.tel = tds[name_tag_idx + 3].get_text().strip() return employee
def handler(tag): employee = Employee() name_divs = tag.find_all("div",class_="teacher-title") if name_divs and len(name_divs) != 0: employee.name = name_divs[0].get_text() employee.name = ''.join(employee.name.split()) # 使用纯文本方式处理 lines = tag.stripped_strings # text=div.get_text(strip=True) parser = ProfileParser(lines=lines,employee=employee) return parser.parse()
def handler(tag): employee = Employee() name_divs = tag.find_all("div", class_="teacher-title") if name_divs and len(name_divs) != 0: employee.name = name_divs[0].get_text() employee.name = ''.join(employee.name.split()) # 使用纯文本方式处理 lines = tag.stripped_strings # text=div.get_text(strip=True) parser = ProfileParser(lines=lines, employee=employee) return parser.parse()
def handler(tag): employee = Employee() ass = tag.find_all('a',class_="orangea") if ass and len(ass) != 0: employee.name = ass[0].get_text() employee.name = ''.join(employee.name.split()) employee.profile = ass[0]['href'] ass = tag.find_all('a',class_="black01") if ass and len(ass) != 0: lines = ass[0].stripped_strings parser = ProfileParser(lines=lines,employee=employee) employee = parser.parse() return employee
def handler(tag): employee = Employee() ass = tag.find_all('a', class_="orangea") if ass and len(ass) != 0: employee.name = ass[0].get_text() employee.name = ''.join(employee.name.split()) employee.profile = ass[0]['href'] ass = tag.find_all('a', class_="black01") if ass and len(ass) != 0: lines = ass[0].stripped_strings parser = ProfileParser(lines=lines, employee=employee) employee = parser.parse() return employee
def handler(tag): ass = tag.find_all(name='a') if not ass: return None employee = Employee() if len(ass) >= 2: employee.email = ass[1].string if employee.email: employee.email = ''.join(employee.email.split()) if len(ass) >= 1: employee.name = ass[0].string if not employee.name: return None employee.name = employee.name.strip() employee.url = ass[0]['href'] return employee
def handler(tag): employee = Employee(url=tag['href']) # 刘全勇: 硕士生导师 string = ''.join(tag.string.split()) string_splits = string.split(u':') if len(string_splits) == 1: employee.name = string_splits[0] elif len(string_splits) == 2: employee.name = string_splits[0] employee.title = string_splits[1] else: return None print("name:"+employee.name ) return employee
def handler(tag): employee = Employee(url=tag['href']) # 刘全勇: 硕士生导师 string = ''.join(tag.get_text().split()) string_splits = string.split(u':') if len(string_splits) == 1: employee.name = string_splits[0] elif len(string_splits) == 2: employee.name = string_splits[0] employee.title = string_splits[1] else: return None print("name:"+employee.name ) return employee
def handler(tag): tds = tag.find_all(name='td') if not tds or len(tds) != 5: return None name = tds[0].get_text() if not name or len(name) == 0: return None employee = Employee() employee.name = ''.join(name.split()) if employee.name == u'姓名': return None ass = tag.find_all('a') if ass: employee.url = ass[0]['href'] title = tds[3].get_text() if title and len(title) != 0: title = ''.join(title.split()) title = title.replace(',',',') employee.title = title print title research = tds[4].get_text() if research and len(research) != 0: employee.research = research.strip() employee.research = employee.research.replace(',',',') return employee
def handler(tag): tds = tag.find_all(name='td') if not tds or len(tds) != 5: return None name = tds[0].get_text() if not name or len(name) == 0: return None employee = Employee() employee.name = ''.join(name.split()) if employee.name == u'姓名': return None ass = tag.find_all('a') if ass: employee.url = ass[0]['href'] title = tds[3].get_text() if title and len(title) != 0: title = ''.join(title.split()) title = title.replace(',', ',') employee.title = title print title research = tds[4].get_text() if research and len(research) != 0: employee.research = research.strip() employee.research = employee.research.replace(',', ',') return employee
def profile_handler(doc,name,url,path): # employee可用属性(url, name, email, tel, title, profile, research, departments,fax,addr): symbols = { u'Email:' :'email', u'邮箱:' :'email', u'电子邮件:' :'email', u'电子邮箱:' :'email', u'电话:' :'tel', u'联系电话:' :'tel', u'Tel:' :'tel', u'办公电话:' :'tel', u'传真:' :'fax', u'URL:' :'url', u'职称:' :'title' } employee = None # 太乱了,只保存名称和个人主页,个人简历文件另存当前目录 soup = BeautifulSoup(doc, Config.SOUP_PARSER) divs = soup.find_all(id="s2_right_con",limit=1) filename = path+name+".html" if not divs or len(divs) == 0: return Employee(name=name,url=url) div =divs[0] with open(filename,'wb') as fp: content = div.prettify() fp.write(content) fp.close() employee = Employee() # parse name name_h4 = div.h4 if name_h4: employee.name = name_h4.string.strip(' \t\n\r') else: print name_h4 for tag in div.children: if not tag.string: continue text = tag.string.strip(' \t\n\r') if len(text) == 0: continue for symbol,name in symbols.items(): idx = text.find(symbol) if idx != -1: idx += len(symbol) value = text[idx:] if hasattr(employee, name): setattr(employee, name, value) symbols # print (name + ":" + value) else: print ("no attr %s in employee" % name) break return employee
def profile_handler(doc, name, url, path): # employee可用属性(url, name, email, tel, title, profile, research, departments,fax,addr): symbols = { u'Email:': 'email', u'邮箱:': 'email', u'电子邮件:': 'email', u'电子邮箱:': 'email', u'电话:': 'tel', u'联系电话:': 'tel', u'Tel:': 'tel', u'办公电话:': 'tel', u'传真:': 'fax', u'URL:': 'url', u'职称:': 'title' } employee = None # 太乱了,只保存名称和个人主页,个人简历文件另存当前目录 soup = BeautifulSoup(doc, Config.SOUP_PARSER) divs = soup.find_all(id="s2_right_con", limit=1) filename = path + name + ".html" if not divs or len(divs) == 0: return Employee(name=name, url=url) div = divs[0] with open(filename, 'wb') as fp: content = div.prettify() fp.write(content) fp.close() employee = Employee() # parse name name_h4 = div.h4 if name_h4: employee.name = name_h4.string.strip(' \t\n\r') else: print name_h4 for tag in div.children: if not tag.string: continue text = tag.string.strip(' \t\n\r') if len(text) == 0: continue for symbol, name in symbols.items(): idx = text.find(symbol) if idx != -1: idx += len(symbol) value = text[idx:] if hasattr(employee, name): setattr(employee, name, value) symbols # print (name + ":" + value) else: print("no attr %s in employee" % name) break return employee
def handler(tag): employee = Employee() lines = tag.stripped_strings ass = tag.find_all(name="a", attrs={"class": "dt_text_tit"}) if not ass or len(ass) == 0: # first line is the name for count, line in enumerate(lines): employee.name = line break else: employee.name = ass[0].string employee.profile = ass[0]["href"] employee.url = employee.profile parser = ProfileParser(lines=lines, employee=employee) employee = parser.parse() return employee
def handler(tag): employee = Employee() lines = tag.stripped_strings ass = tag.find_all(name="a", attrs={"class": "dt_text_tit"}) if not ass or len(ass) == 0: # first line is the name for count, line in enumerate(lines): employee.name = line break else: employee.name = ass[0].string employee.profile = ass[0]['href'] employee.url = employee.profile parser = ProfileParser(lines=lines, employee=employee) employee = parser.parse() return employee
def handler(tag): name_spans = tag.find_all(class_="handle") if not name_spans or len(name_spans) == 0: return None # js <span class="handle" onclick="toCardDetailAction('10c07e70-3fb6-42af-aa26-bfab26b6ce0406');" style="color:#2084D2;font-size: 16px;">艾明晶</span> employee = Employee() employee.name = name_spans[0].get_text() employee.name = ''.join(employee.name.split()) card_id = name_spans[0]['onclick'][len('toCardDetailAction(\''):-3] employee.url = 'http://scse.buaa.edu.cn/buaa-css-web/toCardDetailAction.action?firstSelId=CARD_TMPL_OF_FIRST_NAVI_CN%20&%20secondSelId=CARD_TMPL_OF_ALL_TEACHER_CN%20&cardId='+card_id print ("card_id=[%s]"%card_id) lines = tag.stripped_strings parser = ProfileParser(lines=lines,employee=employee) return parser.parse()
def handler(tag): tds = tag.find_all(name='td') if not tds or len(tds) != 3: return None employee = Employee() employee.name = tds[0].get_text() or '' employee.name = ''.join(employee.name.split()) # 过滤表头 if employee.name == u'姓名': return None employee.title = tds[1].get_text() employee.title = ''.join(employee.title.split()) employee.email = tds[2].get_text() employee.email = ''.join(employee.email.split()) employee.email = email_value_strip(employee.email) # print(tag) return employee
def handler(tag): dd_tables = { "Email":"email", "Phone":"tel", "Homepage":"profile", 'Math Fields':'research' } lis = tag.find_all('li') if len(lis) < 4: return None employee = Employee() pre_len = len(u'职务:') employee.name = lis[0].get_text() employee.profile = lis[0].a['href'] employee.url = employee.url or employee.profile if not employee.name: employee.name = employee.name[pre_len:] employee.name = ''.join(employee.name.split()) employee.title = lis[1].get_text() if employee.title: employee.title = employee.title[pre_len:] employee.title = ''.join(employee.title.split()) employee.tel = lis[2].get_text() if employee.tel: employee.tel = employee.tel[pre_len:] employee.tel = ''.join(employee.tel.split()) employee.email = lis[3].get_text() if employee.email: employee.email = employee.email[pre_len:] employee.email = ''.join(employee.email.split()) print("name:"+employee.name+",email:"+employee.email) return employee
def handler(tag): employee = Employee() names = tag.get_text() names = ''.join(names.split()) names = names.replace(')','') names = names.replace(')','') names = names.replace('(','(') names = names.split('(') employee.name = names[0] if len(names) >= 2: employee.title = names[1] employee.url = tag['href'] print employee.name, employee.title return employee
def handler(tag): dd_tables = { "Email":"email", "Phone":"tel", "Homepage":"profile", 'Math Fields':'research' } h3 = tag.find_all(name='h3') if not h3: return None employee = Employee() employee.name = h3[0].get_text() or '' employee.name = ''.join(employee.name.split()) title_spans = tag.find_all(name="span",class_="faculty-title") employee.title = title_spans[0].get_text() #employee.title = ''.join(employee.title.split()) tds = tag.find_all(name='dt',class_="faculty-info") if not tds or len(tds) < 5: return None employee.email = tds[0].get_text() employee.email = ''.join(employee.email.split()) employee.tel = tds[1].get_text() employee.tel = ''.join(employee.tel.split()) employee.profile = tds[3].get_text() employee.profile = ''.join(employee.profile.split()) employee.research = tds[4].get_text() #employee.research = ''.join(employee.research.split()) return employee
def handler(tag): dd_tables = { "Email": "email", "Phone": "tel", "Homepage": "profile", 'Math Fields': 'research' } h3 = tag.find_all(name='h3') if not h3: return None employee = Employee() employee.name = h3[0].get_text() or '' employee.name = ''.join(employee.name.split()) title_spans = tag.find_all(name="span", class_="faculty-title") employee.title = title_spans[0].get_text() #employee.title = ''.join(employee.title.split()) tds = tag.find_all(name='dt', class_="faculty-info") if not tds or len(tds) < 5: return None employee.email = tds[0].get_text() employee.email = ''.join(employee.email.split()) employee.tel = tds[1].get_text() employee.tel = ''.join(employee.tel.split()) employee.profile = tds[3].get_text() employee.profile = ''.join(employee.profile.split()) employee.research = tds[4].get_text() #employee.research = ''.join(employee.research.split()) return employee
def update_employee(self, employee_id): # ------------------------- # Update data of employee # ------------------------- if request.form.get('_method') != 'PUT': app.logger.Info( 'Cannot perform this action. Please contact administrator') abort(405) employee = Employee(id=employee_id) try: employee = employee.list_one_or_none_employee() if employee is None: app.logger.info( f'No data with Employee ID = {employee_id} could be found!') abort(422) except BaseException: app.logger.info( f'An error occurred. No data with Employee ID\ = {employee_id} could be found!') abort(422) employee.id = employee_id employee.name = request.form.get('name', employee.name) temp = request.form.get('department_name') employee.department_id = temp.split(' - ', 2)[0] employee.title = request.form.get('title', employee.title) employee.emp_number = request.form.get('emp_number', employee.emp_number) employee.address = request.form.get('address', employee.address) employee.phone = request.form.get('phone', employee.phone) employee.wage = request.form.get('wage', employee.wage) employee.is_active = 'is_active' in request.form try: employee.update_employee_in_database() flash( f'Employee {employee_id} was successfully updated!', 'success') except BaseException: app.log.info(f'An error occurred. Employee {employee_id} \ could not be updated!') abort(422) return redirect(url_for('employees'))
def create_Employee(emp : createEmployee, db : Session = Depends(get_db)): _employee = Employee() _employee.id = emp.id _employee.name = emp.name _employee.email = emp.email _employee.position = emp.position _employee.works_on: emp.works_on _employee.reporting_manager : emp.reporting_manager db.add(_employee) db.commit() # Background_tasks.add_task(fetch_emp_data,employee.id) return{ "code":"Success", "messege": "Employee created the name"+emp.name }
def create_employee_process(): employee = Employee() employee.name = request.form['name'] employee.phone = request.form['phone'] employee.email = request.form['email'] employee.address = { 'street': request.form['street'], 'city': request.form['city'], 'state': request.form['state'], 'zipcode': request.form['zipcode'], } if employee.create_employee(): msg = Message() employee.send_email(mail, msg, app) return 'Success' else: raise Exception
def handler(tag): symbols = set([u'首页',u'第一页',u'下一页',u'最后页 ',u'上一页',]) if not tag.string: return None name = tag.string.strip() if name in symbols: return None employee = Employee(name=name, url=tag['href']) # 根据预定的关键词推测身分 for keyword in PROFILE_TITLES: idx = name.find(keyword) if idx != -1: employee.name = name[:idx] employee.title = name[idx:] break return employee
def handler(tag): name_symbol = u'姓名' tds = tag.find_all('td') if len(tds) != 7: return None if tds[0].get_text().strip() == name_symbol: return None employee = Employee() ass = tds[0].find_all('a') if len(ass) != 0: employee.url = ass[0]['href'] employee.name = tds[0].get_text().strip() employee.email = tds[2].get_text().strip() employee.title = tds[3].get_text().strip() employee.research = tds[6].get_text().strip() employee.research.replace('\n', '.') print employee.name, employee.email, employee.title return employee
def handler(tag): name_symbol = u'姓名' tds = tag.find_all('td') if len(tds) != 7: return None if tds[0].get_text().strip() == name_symbol: return None employee = Employee() ass = tds[0].find_all('a') if len(ass) != 0: employee.url = ass[0]['href'] employee.name = tds[0].get_text().strip() employee.email = tds[2].get_text().strip() employee.title = tds[3].get_text().strip() employee.research = tds[6].get_text().strip() employee.research.replace('\n','.') print employee.name,employee.email,employee.title return employee
def handler(tag): tds = tag.find_all("td") if not tds or len(tds) != 3: return None employee = Employee() name = tds[0].get_text() if not name: return None name = name.strip() if name == u'姓名': return None names = name.split('/') name = names[0] employee.name = name employee.title = tds[1].string.strip() ass = tag.find_all('a') if ass and len(ass) != 0: employee.url = ass[0]['href'] return employee
def handler(tag): symbols = ['首页' ,'尾页','师资队伍' ,'教师简介','教授', '上一页','下一页','[1]','[2]','1','2'] name = tag.get_text() if not name or len(name) == 0: return None employee = Employee(url = tag['href']) name = name.strip() # 特殊过滤去掉说明头 for s in symbols: if name == s: return None names = name.split(' ') if len(names) >= 2: employee.title = names[1] employee.name = ''.join(names[:-1]) return employee
def handler(tag): symbols = [ '首页', '尾页', '师资队伍', '教师简介', '教授', '上一页', '下一页', '[1]', '[2]', '1', '2' ] name = tag.get_text() if not name or len(name) == 0: return None employee = Employee(url=tag['href']) name = name.strip() # 特殊过滤去掉说明头 for s in symbols: if name == s: return None names = name.split(' ') if len(names) >= 2: employee.title = names[1] employee.name = ''.join(names[:-1]) return employee
def profile_handler(doc, name, url, path): # employee可用属性(url, name, email, tel, title, profile, research, departments,fax,addr): symbols = { 'email': u'电子邮件:', 'tel': u'办公电话:', 'addr': u'办公地址:', 'research': u'研究方向:' } employee = None soup = BeautifulSoup(doc, Config.SOUP_PARSER) divs = soup.find_all("td", attrs={"valign": "top"}, limit=1) if not divs or len(divs) == 0: return employee div = divs[0] employee = Employee() # save file filename = path + name + ".html" with open(filename, 'wb') as fp: content = div.prettify() fp.write(content) fp.close() # parse name name_h3 = div.h3 if name_h3: employee.name = name_h3.string.strip(' \t\n\r') else: print name_h3 # parse title dls = soup.dl if dls and len(dls) >= 1: print dls if dls.dt: employee.title = dls.dt.string # parse everything for tag in dls.children: if not tag.string: continue text = tag.string.strip(' \t\n\r') if len(text) == 0: continue for name, symbol in symbols.items(): idx = text.find(symbol) if idx != -1: idx += len(symbol) value = text[idx:] if hasattr(employee, name): setattr(employee, name, value) # print (name + ":" + value) else: print("no attr %s in employee" % name) break # parse profile teachcontent = soup.find_all("div", class_="teachcontent", limit=1) if len(teachcontent) != 0: content = teachcontent[0] link = content.a if link: employee.url = link['href'] return employee
def profile_handler(doc,name,url,path): # employee可用属性(url, name, email, tel, title, profile, research, departments,fax,addr): symbols = { 'email': u'电子邮件:', 'tel': u'办公电话:', 'addr': u'办公地址:', 'research':u'研究方向:' } employee = None soup = BeautifulSoup(doc, Config.SOUP_PARSER) divs = soup.find_all("td",attrs={"valign":"top"},limit=1) if not divs or len(divs) == 0: return employee div = divs[0] employee = Employee() # save file filename = path+name+".html" with open(filename,'wb') as fp: content = div.prettify() fp.write(content) fp.close() # parse name name_h3 = div.h3 if name_h3: employee.name = name_h3.string.strip(' \t\n\r') else: print name_h3 # parse title dls = soup.dl if dls and len(dls) >= 1: print dls if dls.dt: employee.title = dls.dt.string # parse everything for tag in dls.children: if not tag.string: continue text = tag.string.strip(' \t\n\r') if len(text) == 0: continue for name, symbol in symbols.items(): idx = text.find(symbol) if idx != -1: idx += len(symbol) value = text[idx:] if hasattr(employee, name): setattr(employee, name, value) # print (name + ":" + value) else: print ("no attr %s in employee" % name) break # parse profile teachcontent = soup.find_all("div",class_="teachcontent",limit=1) if len(teachcontent) != 0: content = teachcontent[0] link= content.a if link: employee.url = link['href'] return employee