Example #1
0
 def orderbytime(self):  # 按照举行时间排序,并且只选中近一年的数据
     self.session = DBSession()
     self.info_bytime = []
     # temp = self.session.query(Notification).filter(
     #     Notification.notify_time >= datetime.datetime.now() - timedelta(days=365)).order_by(
     #     desc(Notification.time)).all()
     temp = self.session.query(Notification).filter(
         and_(
             Notification.time >=
             datetime.datetime.now() - timedelta(days=365),  # 时间
             or_(Notification.title.like("%密码学%"),
                 Notification.title.like("%信息安全%"),
                 Notification.title.like("%security%"),
                 Notification.title.like("%password%"))  # 筛选信息
         )).order_by(desc(Notification.notify_time)).all()
     print("按照报告举行时间由近及远排序:")
     for t in temp:
         t_dict = t.__dict__
         info = {
             'title': t_dict['title'],
             'speaker': t_dict['speaker'],
             'time': t_dict['time'],
             'venue': t_dict['venue'],
             'college': t_dict['college'],
             'url': t_dict['url'],
             'notiify_time': t_dict['notiify_time']
         }
         self.info_bytime.append(info)
Example #2
0
 def __init__(self, seed, title_urls):
     self.session = DBSession()
     self.key_word=KeyWords()    #匹配关键字
     self.seed = seed
     self.title_urls = title_urls
     self.urls = list(title_urls.values())
     self.information = {'title': self.key_word.title, 'speaker': self.key_word.speaker,
                         'time': self.key_word.time, 'venue': self.key_word.venue}
Example #3
0
 def __init__(self):
     self.process=CrawlerProcess(get_project_settings())
     self.db=DBSession()
     self.init_seed_data()
     #设置默认值
     # self.title_word=str(input('请输入学术讲座通知的匹配关键字:'))
     self.title = '报告题目:,学术报告:,题目,报告主题:,Title'        #(默认值)
     self.speaker = '报告人:,主讲人:,汇报人:,Speaker'
     self.venue = '地点:,Address,Venue,Place'
     self.time = '日期:,时间:,Time'
Example #4
0
 def open_spider(self, spider):
     self.session = DBSession()
Example #5
0
 def __init__(self, *a, **kw):
     super().__init__(*a, **kw)
     self.college = '清华大学交叉信息研究院'
     self.db = DBSession()
Example #6
0
from db_model.seeds import Seed
from db_model.db_config import DBSession
from db_model.notifications import Notification
from db_model.db_config import Seed
from db_model.db_config import Notification
from UrlHandle import UrlHandle

from armus1.spiders.notice import NoticeSpider
from armus1.spiders.thu_iiis import ThuIiisSpider
# scrapy api
from scrapy.utils.project import get_project_settings
from scrapy.crawler import CrawlerProcess

process = CrawlerProcess(get_project_settings())

db = DBSession()
#scut_se=Seed(start_url='http://www2.scut.edu.cn/sse/xshd/list.htm',college='华南理工大学软件学院',
#url_xpath='.//*[@class="news_ul"]//li',
#nextpage_xpath='//*[@id="wp_paging_w67"]/ul/li[2]/a[3]',
#title_word='举办,举行',
#notice_time_xpath='//*[@id="page-content-wrapper"]/div[2]/div/div/div[2]/div/div/div/p/span[1]',
#title='汇报主题:,报告题目:,题目:,Title:,报告主题:',speaker='汇报人:,报告人:,Speaker',
#venue='地点:,venue:,Address:',time='Time:,时间:',
#text_xpath='//*[@id="page-content-wrapper"]/div[2]/div/div/div[2]/div/div/div/div[2]/div/div//p')

#jnu_xx=Seed(start_url='https://xxxy2016.jnu.edu.cn/Category_37/Index.aspx',
#college='暨南大学信息科学技术学院/网络空间安全学院',
#url_xpath='//*[@id="mainContent"]/div[2]/ul//li',
#nextpage_xpath='//*[@id="pe100_page_通用信息列表_普通式"]/div/a[9]',
#title_word='学术讲座',
#notice_time_xpath='//*[@id="mainContent"]/div[2]/div/div[1]/span[3]',