Esempio n. 1
0
def is_doc_exists(title):
    target = db_session.query(WikiPage) \
        .filter(WikiPage.title == title).first()

    if not target:
        return False, None
    else:
        return True, target.id
Esempio n. 2
0
def insert_to_wiki_page(title, state):
    target = db_session.query(WikiPage) \
        .filter(WikiPage.title == title).first()
    if not target:
        args = {'title': title, 'state': state}
        new_row = WikiPage(**args)
        db_session.add(new_row)
        db_session.flush()
        return new_row.id
    else:
        target.state = state
        return target.id
Esempio n. 3
0
    def execute(self, context):
        for i in range(1):
            result = db_session.query(WikiPage).first()
            if not result:
                docs = {'사과': 'https://namu.wiki/w/사과'}
            else:
                result = db_session.query(WikiPage) \
                    .filter(WikiPage.state == False).all()

                rows = [as_dict(x) for x in result]
                docs = dict()
                for row in rows:
                    title = row.get('title')
                    url = f'https://namu.wiki/w/{title}'
                    docs[title] = url

            print(f'{i}번째 depth docs 갯수: {len(docs.keys())}\n')
            for i, (k, v) in enumerate(docs.items()):
                if i % 10 == 0:
                    print(f'{i}/{len(docs.keys())}')
                crawl_doc(title=k, url=v)
Esempio n. 4
0
    def execute(self, context):
        # mongo connector
        mc = MongoConnector()
        finder = mc.make_finder()
        i = 0
        while True:
            i += 1
            result = db_session.query(WikiPage).first()
            if not result:
                docs = ['사과']
            else:
                result = db_session.query(WikiPage) \
                    .filter(WikiPage.state == False).all()
                if not result:
                    break
                rows = [as_dict(x) for x in result]
                docs = [row.get('title') for row in rows]

            print(f'{i}번째 depth docs 갯수: {len(docs)}\n')
            for i, k in enumerate(docs):
                if i % 10 == 0:
                    print(f'{i}/{len(docs)}')
                crawl_doc(key=k, finder=finder)
Esempio n. 5
0
def insert_to_wiki_page_relation(parent_id, child_id, updated_at):
    target = db_session.query(WikiPageRelation) \
        .filter(WikiPageRelation.parent_id == parent_id) \
        .filter(WikiPageRelation.child_id == child_id).first()

    if not target:
        args = {
            'parent_id': parent_id,
            'child_id': child_id,
            'updated_at': updated_at
        }
        new_row = WikiPageRelation(**args)
        db_session.add(new_row)
    else:
        target.updated_at = updated_at