def orderbytime(self): # 按照举行时间排序,并且只选中近一年的数据 self.session = DBSession() self.info_bytime = [] # temp = self.session.query(Notification).filter( # Notification.notify_time >= datetime.datetime.now() - timedelta(days=365)).order_by( # desc(Notification.time)).all() temp = self.session.query(Notification).filter( and_( Notification.time >= datetime.datetime.now() - timedelta(days=365), # 时间 or_(Notification.title.like("%密码学%"), Notification.title.like("%信息安全%"), Notification.title.like("%security%"), Notification.title.like("%password%")) # 筛选信息 )).order_by(desc(Notification.notify_time)).all() print("按照报告举行时间由近及远排序:") for t in temp: t_dict = t.__dict__ info = { 'title': t_dict['title'], 'speaker': t_dict['speaker'], 'time': t_dict['time'], 'venue': t_dict['venue'], 'college': t_dict['college'], 'url': t_dict['url'], 'notiify_time': t_dict['notiify_time'] } self.info_bytime.append(info)
def __init__(self, seed, title_urls): self.session = DBSession() self.key_word=KeyWords() #匹配关键字 self.seed = seed self.title_urls = title_urls self.urls = list(title_urls.values()) self.information = {'title': self.key_word.title, 'speaker': self.key_word.speaker, 'time': self.key_word.time, 'venue': self.key_word.venue}
def __init__(self): self.process=CrawlerProcess(get_project_settings()) self.db=DBSession() self.init_seed_data() #设置默认值 # self.title_word=str(input('请输入学术讲座通知的匹配关键字:')) self.title = '报告题目:,学术报告:,题目,报告主题:,Title' #(默认值) self.speaker = '报告人:,主讲人:,汇报人:,Speaker' self.venue = '地点:,Address,Venue,Place' self.time = '日期:,时间:,Time'
def open_spider(self, spider): self.session = DBSession()
def __init__(self, *a, **kw): super().__init__(*a, **kw) self.college = '清华大学交叉信息研究院' self.db = DBSession()
from db_model.seeds import Seed from db_model.db_config import DBSession from db_model.notifications import Notification from db_model.db_config import Seed from db_model.db_config import Notification from UrlHandle import UrlHandle from armus1.spiders.notice import NoticeSpider from armus1.spiders.thu_iiis import ThuIiisSpider # scrapy api from scrapy.utils.project import get_project_settings from scrapy.crawler import CrawlerProcess process = CrawlerProcess(get_project_settings()) db = DBSession() #scut_se=Seed(start_url='http://www2.scut.edu.cn/sse/xshd/list.htm',college='华南理工大学软件学院', #url_xpath='.//*[@class="news_ul"]//li', #nextpage_xpath='//*[@id="wp_paging_w67"]/ul/li[2]/a[3]', #title_word='举办,举行', #notice_time_xpath='//*[@id="page-content-wrapper"]/div[2]/div/div/div[2]/div/div/div/p/span[1]', #title='汇报主题:,报告题目:,题目:,Title:,报告主题:',speaker='汇报人:,报告人:,Speaker', #venue='地点:,venue:,Address:',time='Time:,时间:', #text_xpath='//*[@id="page-content-wrapper"]/div[2]/div/div/div[2]/div/div/div/div[2]/div/div//p') #jnu_xx=Seed(start_url='https://xxxy2016.jnu.edu.cn/Category_37/Index.aspx', #college='暨南大学信息科学技术学院/网络空间安全学院', #url_xpath='//*[@id="mainContent"]/div[2]/ul//li', #nextpage_xpath='//*[@id="pe100_page_通用信息列表_普通式"]/div/a[9]', #title_word='学术讲座', #notice_time_xpath='//*[@id="mainContent"]/div[2]/div/div[1]/span[3]',