Example #1
0
class ItemBankPipeLine(object):
    session = DBSession()

    def process_item(self, item, spider):
        if "item_bank" == spider.name:
            item_dict = dict(**item)
            point_list = item_dict.pop('point')

            for point in point_list:
                chaper_point = dict()
                chaper_point['chaper_id'] = point.get('chaper_id')
                chaper_point['title'] = point.get('title')  # point.get('url')
                chaper_point['code'] = point.get('point_code')
                chaper_point['url'] = point.get('url')
                self.session.add(ChaperPoint(**chaper_point))
                item_point = dict()
                item_point['item_id'] = point.get('item_id')
                item_point['point_code'] = point.get('point_code')
                self.session.add(ItemPoint(**item_point))
            try:
                self.session.add(ItemBank(**item_dict))
            except Exception as e:
                print(e,item_dict)
            return item
        return item
Example #2
0
class LibraryChapterPipeLine(object):
    session = DBSession()

    def process_item(self, item, spider):
        if "library_chapter" == spider.name:
            # print(item)
            self.session.add(LibraryChapter(**item))
            return item
        return item
Example #3
0
def get_item_bank_init_url():
    """
    获取题库url列表用来爬取数据
    :return:
    """
    from jyeoo.mysql_model import DBSession, LibraryChapter, LibraryEntry, ItemStyle, ItemBankInit
    # re_list = list()

    re_dict = dict()
    session = DBSession()
Example #4
0
class ChapterPointPipeLine(object):
    session = DBSession()

    def process_item(self, item, spider):
        if "chapter_point" == spider.name:
            query = self.session.session.query(ChaperPoint).filter(ChaperPoint.id == item.get('id')).one()
            query.content = item.get('content')
            self.session.session.commit()
            return item
        return item
Example #5
0
def get_item_bank_url():
    from jyeoo.mysql_model import DBSession, ItemBankInit
    session = DBSession()

    query = session.session.query(ItemBankInit).filter(ItemBankInit.is_finish == 0)
    last_data = None

    for item in query:
        if last_data:
            last_data.is_finish = 1
            session.session.commit()
        last_data = item
        yield item.detail_page_url
Example #6
0
def get_chapter_url():
    from jyeoo.mysql_model import DBSession, LibraryEntry
    # re_list = list()
    re_dict = dict()
    session = DBSession()
    query = session.session.query(LibraryEntry).all()
    for item in query:
        url_str = 'http://www.jyeoo.com/{subject}/ques/search?f=0&q={id}'
        if int(item.level_code) > 1:
            re_dict[item.id] = url_str.format(subject=item.subject_code + item.level_code, id=item.id)
        else:
            re_dict[item.id] = url_str.format(subject=item.subject_code, id=item.id)
    return re_dict
Example #7
0
class ItemBankInitPipeLine(object):
    session = DBSession()

    def process_item(self, item, spider):
        if "item_bank_init" == spider.name:
            fieldset_id = item.get('fieldset_id')
            if 0 == self.session.session.query(ItemBankInit).filter(ItemBankInit.fieldset_id == fieldset_id).count():
                try:
                    self.session.add(ItemBankInit(**item))
                except Exception as e:
                    print(e)
            return item
        return item
Example #8
0
def get_chapter_point_url():
    """
    获取知识点url列表
    :return:
    """
    from jyeoo.mysql_model import DBSession, ChaperPoint
    session = DBSession()
    chaper_point_query = session.session.query(ChaperPoint).filter(ChaperPoint.content.is_(None))
    re_list = list()
    for item in chaper_point_query:
        temp_dict = dict()
        temp_dict['url'] = item.url
        temp_dict['id'] = item.id
        re_list.append(temp_dict)
    return re_list
Example #9
0
def get_valid_cookie(re_type=DICT):
    """
    获取有效的cookie
    :param re_type:返回类型
    :return:
    """
    from jyeoo.mysql_model import DBSession, CookieInfo
    session = DBSession()
    cookie_query = session.session.query(CookieInfo).filter(CookieInfo.is_valid == 1)
    cookie_str = cookie_query[0].cookie
    # 调用api网址获取cookie
    # cookie_str = login_parse()
    # for item in cookie_query:
    if isinstance(re_type, dict):
        return cookie_str_to_dict(cookie_str)
        # return cookie_str_to_dict(item.cookie)
    if isinstance(re_type, str):
        return cookie_str
        # return item.cookie
    if isinstance(re_type, list):
        return cookie_str_to_list(cookie_str)
        # return cookie_str_to_list(item.cookie)
    return None