Python Articleの例、db.Article Pythonの例

コード例 #1

0

ファイルを表示

async def main():
    conf = Config()

    logging.basicConfig(level=logging.DEBUG)
    logging.config.dictConfig(conf.DEFAULT_LOGGING)
    logger = logging.getLogger(__name__)

    db = ExtendedDBManager(init_db(conf))
    db.database.create_tables([Article], safe=True)

    executor = ThreadPoolExecutor(max_workers=10)
    loop.set_default_executor(executor)

    DATA_FOR_MATPLOTLIB = {}

    await truncate(db=db)
    await vacuum(db=db)
    await drop_index(db=db)

    for mode in ["noindex", 'index']:
        await truncate(db=db)
        await vacuum(db=db)
        if mode == 'index':
            await create_index(db=db)
        else:
            await drop_index(db=db)

        for i in range(1, 81):
            await buck_create_new(db=db, epoch_count=i, count=10**6, mode=mode)
            row1 = await db.get(Article.select().limit(1))
            row2 = await db.get(Article.select().order_by(
                Article.created_date.desc()).limit(1))

            if mode == 'noindex':
                arv_time__noindex1 = await call_avr_time(db=db, text=row1.name)
                arv_time__noindex2 = await call_avr_time(db=db, text=row2.name)
                arv_time__noindex = max(arv_time__noindex1, arv_time__noindex2)

                logger.info(f"Time NoIndex={arv_time__noindex}")
                DATA_FOR_MATPLOTLIB[str(i)] = {"noindex": arv_time__noindex}
            else:
                arv_time__index1 = await call_avr_time(db=db, text=row1.name)
                arv_time__index2 = await call_avr_time(db=db, text=row2.name)
                arv_time__index = max(arv_time__index1, arv_time__index2)

                logger.info(f"Time Index={arv_time__index}")
                DATA_FOR_MATPLOTLIB[str(i)].update({"index": arv_time__index})

            logger.info(f"")
            now_count = await db.count(Article.select())
            logger.info(f"Row in db count = {now_count}")
            logger.info(f"==  ==  " * 15)
            logger.info(f"==  ==  " * 15)

    FileReader.write_data(DATA_FOR_MATPLOTLIB)
    logger.info(f"Exit")

コード例 #2

0

ファイルを表示

 def create_article(self, article: schemas.ArticleCreate) -> Any:
     """ Create New Article """
     try:
         db_article = Article(user_id=article.user_id,
                              article_title=article.article_title,
                              article_text=article.article_text,
                              tags=article.tags)
         db_article.save()
         return db_article
     except Exception as e:
         fastapi_logger.exception("create_article")
         return None

コード例 #3

0

ファイルを表示

ファイル: test.py プロジェクト: BorisovDima/_ex

def test_db(client):
    mixin = DatabaseMixin()
    mixin.db = client.db
    mixin.Table = Article

    def check_article(first, second):
        assert first.title == second.title
        assert first.subtitle == second.subtitle
        assert first.article == second.article
        assert first.date == second.date
        assert first.image == second.image
        assert first.id == second.id

    with mixin.db.init_db():
        with client.app.test_request_context():
            articles = {}

            # create
            like_article = namedtuple(
                'article',
                ['title', 'subtitle', 'article', 'date', 'image', 'id'])

            for i in range(1, 11):
                a = mixin.create(f'title{i}', f'subtitle{i}', f'article{i}',
                                 f'date{i}', f'image{i}')
                a_ = like_article(f'title{i}', f'subtitle{i}', f'article{i}',
                                  f'date{i}', f'image{i}', i)
                check_article(a, a_)
                articles[i] = a

            # get_many
            articles_ = mixin.get_many()
            for a_ in articles_:
                a = articles[a_.id]
                check_article(a, a_)

            # get_id
            a = articles[random.randrange(1, 11)]
            a_ = mixin.get_id(a.id)
            check_article(a, a_)

            # delete_many
            articles_ = mixin.get_many()
            assert len(articles_) == 10
            mixin.delete_many()
            articles_ = mixin.get_many()
            assert len(articles_) == 0

            # Article
            obj = Article('title', 'subtitle', 'article' * 100, 'date',
                          'image', 1)
            assert len(obj.get_text(300)) + len(obj.title) == 300

コード例 #4

0

ファイルを表示

async def create_index(db, index="article_name"):
    try:
        await db.execute(
            Article.raw(sql=f"CREATE INDEX {index} ON article (name);"))
    except psycopg2.ProgrammingError as e:
        return True
    return False

コード例 #5

0

ファイルを表示

async def truncate(db):
    try:
        logger.info("TRUNCATE done!")
        await db.execute(Article.raw(sql="TRUNCATE TABLE article;"))
    except psycopg2.ProgrammingError as e:
        return True
    return False

コード例 #6

0

ファイルを表示

ファイル: dbPopulator.py プロジェクト: thomas-j-sell/backend

def populate():
    startIndex = 0
    count = 20

    while (startIndex <= 300):
        r = requests.get('http://ign-apis.herokuapp.com/articles?startIndex=' +
                         str(startIndex) + '0&count=' + str(count))
        j = r.json()

        for item in j['data']:
            headline = item['metadata']['headline']
            subHeadline = item['metadata']['subHeadline']
            link = "ign.com/articles/" + item['metadata']['slug']
            a = Article(headline=headline, subHeadline=subHeadline, link=link)
            saveArticle(a)

        startIndex += count

    startIndex = 0
    count = 20

    while (startIndex <= 300):
        r = requests.get('http://ign-apis.herokuapp.com/videos?startIndex=' +
                         str(startIndex) + '0&count=' + str(count))
        j = r.json()

        for item in j['data']:
            name = item['metadata']['name']
            description = item['metadata']['description']
            link = item['metadata']['url']
            v = Video(name=name, description=description, link=link)
            saveVideo(v)

        startIndex += count

コード例 #7

0

ファイルを表示

async def atom_task(db, data):
    query = Article.insert_many(data,
                                fields=[
                                    Article.status, Article.name, Article.body,
                                    Article.created_date
                                ])
    await db.execute(query)
    return True

コード例 #8

0

ファイルを表示

 def get_article(self, article_id: str):
     """ Get A Single article """
     try:
         data = Article.objects(id=article_id).first()
         return data
     except Exception as e:
         fastapi_logger.exception("get_article")
         return None

コード例 #9

0

ファイルを表示

 def delete_article(self, article_id: str) -> Any:
     """ Delete Article """
     try:
         db_article = Article.objects(id=article_id)
         db_article.delete()
         return True
     except Exception as e:
         fastapi_logger.exception("delete_article")
         return None

コード例 #10

0

ファイルを表示

async def vacuum(db):
    try:
        logger.info("Prepare to VACUUM")
        await db.execute(
            Article.raw(sql="VACUUM(FULL, VERBOSE, ANALYZE) article;"))
        logger.info("VACUUM done!")
    except psycopg2.ProgrammingError as e:
        return True
    return False

コード例 #11

0

ファイルを表示

ファイル: views.py プロジェクト: shimachao/markdown_blog

def article(article_id):
    # 返回请求的文章
    d = Article.select().where(Article.id == article_id)
    print(d)
    if not d:
        abort(404)
    d = d.get()
    path = '..\\' + d.path
    file = open(file=path, mode='r', encoding='utf-8')
    text = md(file.read())
    return render_template('article.html', title=d.title,text=text)

コード例 #12

0

ファイルを表示

ファイル: crawl_page.py プロジェクト: anch0vy/dcCrawl

 def add2db(self, id, writer, ip, title, content, time_):
     '''디비에 넣는다.
     이미 존재하는지는 add2db 호출전 IsArticleExist로 체크해줘야함
     '''
     timestamp = time.mktime(datetime.datetime.strptime(time_, "%Y-%m-%d %H:%M:%S").timetuple())
     article = Article()
     article.id = id
     article.content = content
     article.writer = writer
     article.ip = ip
     article.title = title
     article.timestamp = timestamp
     article.category = self.categoryId
     article.comment = 0
     article.isDelete = False
     self.s_db.add(article)
     self.s_db.commit()
     if self.debug:
         try:
             print '[add]', id, ':', int(time.time()) - int(timestamp), title
         except:
             print '[error]debug print error'
     return

コード例 #13

0

ファイルを表示

    def update_article(self, article_id: str,
                       article: schemas.ArticleCreate) -> Any:
        """ Update Article """
        try:
            db_article = Article.objects(id=article_id).first()

            db_article.article_title = article.article_title
            db_article.article_text = article.article_text
            db_article.tags.extend(article.tags)
            db_article.modified_timestamp = datetime.utcnow()

            db_article.save()
            return db_article
        except Exception as e:
            fastapi_logger.exception("update_article")
            return None

コード例 #14

0

ファイルを表示

ファイル: app.py プロジェクト: TKais/CompSci-Catalog

def create_article(topic_url, category_url):
    category = session.query(Category).filter_by(url=category_url).one()
    if 'username' not in login_session:
        return redirect('/login')
    if request.method == 'POST':
        user_id = login_session['user_id']
        new_article = Article(name=request.form['aname'],
                              content=request.form['acontent'],
                              category_id=category.id,
                              user_id=user_id)
        session.add(new_article)
        session.commit()
        return redirect(
            url_for('show_category',
                    topic_url=topic_url,
                    category_url=category_url))
    else:
        return render_template('new_article.html', category=category)

コード例 #15

0

ファイルを表示

def map_article(article):
    source = map_source(article['source'])
    return Article(
        id=article['id'],
        author=map_author(article['author'], source),
        title=article['title'],
        perex=article['perex'],
        body=article['body'],
        raw_body=article['raw_body'],
        published_at=article['published_at'],
        extracted_at=article['extracted_at'],
        url=article['url'],
        source_id=source.id,
        media=[map_media(article['id'], m) for m in article['media']],
        category=article['category'],
        other_info=article['other_info'],
        veracity=article['veracity'],
        monitor_id=article['monitor_id'],
        monitor_name=article['monitor_name'])

コード例 #16

0

ファイルを表示

async def call_avr_time(db: ExtendedDBManager, text, n=20):
    all_tasks = [
        db.execute(
            Article.raw(
                sql=
                f"EXPLAIN ANALYSE SELECT * FROM article where name = '{text}';"
            )) for _ in range(n)
    ]
    res = await asyncio.gather(*all_tasks)
    time = [
        float(r[0].replace("Execution Time: ", '').replace(" ms", ''))
        for row in list(res) for r in row._rows
        if r[0].startswith("Execution Time:")
    ]
    avr = mean(time)
    avr_g = 1.3 * avr
    avr_l = 0.7 * avr
    new_time = [t for t in time if avr_l <= t <= avr_g]
    return mean(new_time) if new_time else avr

コード例 #17

0

ファイルを表示

ファイル: website.py プロジェクト: OleksandrShcherbinin/my_first_aiohttp_try

async def index(request):
    art_count = 30
    page = int(request.rel_url.query.get('page', 0))
    if page == 1:
        raise web.HTTPFound('/')
    art_sql = Article.select().limit(art_count).offset(page * art_count)
    articles = await go(request, art_sql)

    co_sql = select([func.count(Article.c.id)])
    count = await go(request, co_sql)
    count = count[0] // 30 + 1
    pages = [x for x in range(1, count)]
    context = {
        'h1': 'Спаршенный Блог о SEO',
        'description': DESCRIPTION,
        'keywords': KEYWORDS,
        'articles': articles,
        'pages': pages
    }
    response = aiohttp_jinja2.render_template('index.html', request, context)
    return response

コード例 #18

0

ファイルを表示

    def __extract_to_sql(self):
        """
        Creates article table if not exists
        If url already exists in database, it will check if html content (raw_content) has changed
        Otherwise it will create new article

        Database sets for SQLite3.
        #TODO: hardcoded to SQLite3, get parameter from user
        """

        # Bad practice for importing
        # But it's creating tables on import
        # TODO: create table when __extract_to_sql() function called
        from db import sql_session as sql

        is_exists = sql.query(
            exists().where(Article.url == self.article.url)).scalar()
        if is_exists:
            # TODO: redundant query count. is_exists should be combined with article variable. affects database performance.
            article = sql.query(Article).filter_by(
                url=self.article.url).first()
            if article.raw_content != self.article.raw_content:
                article.raw_content = self.article.raw_content
                article.content = self.article.content
                article.title = self.article.title
                article.meta_keywords = self.article.meta_keywords
                article.meta_description = self.article.meta_description
                article.images = json.dumps(self.article.images)
                sql.commit()
        else:
            article = Article(title=self.article.title,
                              content=self.article.content,
                              url=self.article.url,
                              raw_content=self.article.raw_content,
                              meta_description=self.article.meta_description,
                              meta_keywords=self.article.meta_keywords,
                              images=json.dumps(self.article.images))
            sql.add(article)
            sql.commit()

コード例 #19

0

ファイルを表示

async def drop_index(db, index="article_name"):
    try:
        await db.execute(Article.raw(sql=f"DROP INDEX IF EXISTS {index};"))
    except psycopg2.ProgrammingError as e:
        return True
    return False

コード例 #20

0

ファイルを表示

ファイル: crawl_processor.py プロジェクト: Sentimentron/sentropy

    def _process_record(self, item_arg):

        crawl_id, record = item_arg
        headers, content, url, date_crawled, content_type = record

        assert headers is not None
        assert content is not None 
        assert url is not None 
        assert date_crawled is not None 
        assert content_type is not None 

        status = "Processed"

        # Fix for a seg-fault
        if "nasa.gov" in url:
            return False

        # Sort out the domain
        domain_identifier = None 
        logging.info("Retrieving domain...")
        domain_key = self.dc.get_Domain_key(url)
        while domain_identifier == None:
            domain_identifier = self.drw.get_domain(domain_key)

        domain = self._session.query(Domain).get(domain_identifier)
        assert domain is not None

        # Build database objects 
        path   = self.ac.get_path_fromurl(url)
        article = Article(path, date_crawled, crawl_id, domain, status)
        self._session.add(article)
        classified_by = self.swc.get_SoftwareVersion_fromstr(pysen.__VERSION__)
        assert classified_by is not None

        if content_type != 'text/html':
            logging.error("Unsupported content type: %s", str(content_type))
            article.status = "UnsupportedType"
            return False

        # Start the async transaction to get the plain text
        worker_req_thread = BoilerPipeWorker(content)
        worker_req_thread.start()

        # Whilst that's executing, parse the document 
        logging.info("Parsing HTML...")
        html = BeautifulSoup(content)

        if html is None or html.body is None:
            article.status = "NoContent"
            return False

        # Extract the dates 
        date_dict = pydate.get_dates(html)

        if len(date_dict) == 0:
            status = "NoDates"

        # Detect the language
        lang, lang_certainty = langid.classify(content)

        # Wait for the BoilerPipe thread to complete
        worker_req_thread.join()
        logging.debug(worker_req_thread.result)
        logging.debug(worker_req_thread.version)

        if worker_req_thread.result == None:
            article.status = "NoContent"
            return False

        # If the language isn't English, skip it
        if lang != "en":
            logging.info("language: %s with certainty %.2f - skipping...", lang, lang_certainty)
            article.status = "LanguageError" # Replace with something appropriate
            return False

        content = worker_req_thread.result.encode('ascii', 'ignore')

        # Headline extraction 
        h_counter = 6
        headline = None
        while h_counter > 0:
            tag = "h%d" % (h_counter,)
            found = False 
            for node in html.findAll(tag):
                if node.text in content:
                    headline = node.text 
                    found = True 
                    break 
            if found:
                break
            h_counter -= 1

        # Run keyword extraction 
        keywords = self.ex(content)
        kset     = KeywordSet(self.stop_list)
        nnp_sets_scored = set([])

        for word, freq, amnt in sorted(keywords):
            try:
                nnp_sets_scored.add((word, freq))
            except ValueError:
                break 

        nnp_adj = set([])
        nnp_set = set([])
        nnp_vector = []
        for sentence in sent_tokenize(content):
            text = nltk.word_tokenize(sentence)
            pos  = nltk.pos_tag(text)
            pos_groups = itertools.groupby(pos, lambda x: x[1])
            for k, g in pos_groups:
                if k != 'NNP':
                    continue
                nnp_list = [word for word, speech in g]
                nnp_buf = []
                for item in nnp_list:
                    nnp_set.add(item)
                    nnp_buf.append(item)
                    nnp_vector.append(item)
                for i, j in zip(nnp_buf[0:-1], nnp_buf[1:]):
                    nnp_adj.add((i, j))

        nnp_vector = filter(lambda x: x.lower() not in self.stop_list, nnp_vector)
        nnp_counter = Counter(nnp_vector)
        for word in nnp_set:
            score = nnp_counter[word]
            nnp_sets_scored.add((item, score))

        for item, score in sorted(nnp_sets_scored, key=lambda x: x[1], reverse=True):
            try: 
                if type(item) == types.ListType or type(item) == types.TupleType:
                    kset.add(' '.join(item))
                else:
                    kset.add(item)
            except ValueError:
                break 

        scored_nnp_adj = []
        for item1, item2 in nnp_adj:
            score = nnp_counter[item1] + nnp_counter[item2]
            scored_nnp_adj.append((item1, item2, score))

        nnp_adj = []
        for item1, item2, score in sorted(scored_nnp_adj, key=lambda x: x[1], reverse=True):
            if len(nnp_adj) < KEYWORD_LIMIT:
                nnp_adj.append((item1, item2))
            else:
                break

        # Generate list of all keywords
        keywords = set([])
        for keyword in kset:
            try:
                k = Keyword(keyword)
                keywords.add(k)
            except ValueError as ex:
                logging.error(ex)
                continue
        for item1, item2 in nnp_adj:
            try:
                k = Keyword(item1)
                keywords.add(k)
            except ValueError as ex:
                logging.error(ex)
            try:
                k = Keyword(item2)
                keywords.add(k)
            except ValueError as ex:
                logging.error(ex)

        # Resolve keyword identifiers
        keyword_resolution_worker = KeywordResolutionWorker(set([k.word for k in keywords]), self.redis_kw)
        keyword_resolution_worker.start()
            
        # Run sentiment analysis
        trace = []
        features = self.cls.classify(worker_req_thread.result, trace) 
        label, length, classified, pos_sentences, neg_sentences,\
        pos_phrases, neg_phrases  = features[0:7]        

        # Convert Pysen's model into database models
        try:
            doc = Document(article.id, label, length, pos_sentences, neg_sentences, pos_phrases, neg_phrases, headline)
        except ValueError as ex:
            logging.error(ex)
            logging.error("Skipping this document...")
            article.status = "ClassificationError"
            return False

        self._session.add(doc)
        extracted_phrases = set([])
        for sentence, score, phrase_trace in trace:
            sentence_type = "Unknown"
            for node in html.findAll(text=True):
                if sentence.text in node.strip():
                    sentence_type = node.parent.name.upper()
                    break

            if sentence_type not in ["H1", "H2", "H3", "H4", "H5", "H6", "P", "Unknown"]:
                sentence_type = "Other"

            label, average, prob, pos, neg, probs, _scores = score 

            s = Sentence(doc, label, average, prob, sentence_type)
            self._session.add(s)
            for phrase, prob, score, label in phrase_trace:
                p = Phrase(s, score, prob, label)
                self._session.add(p)
                extracted_phrases.add((phrase, p))

        # Wait for keyword resolution to finish
        keyword_resolution_worker.join()
        keyword_mapping = keyword_resolution_worker.out_keywords

        # Associate extracted keywords with phrases
        keyword_objects, short_keywords = kset.convert(keyword_mapping, self.kwc)
        for k in keyword_objects:
            self._session.merge(k)
        for p, p_obj in extracted_phrases:
            for k in keyword_objects:
                if k.word in p.get_text():
                    nk = KeywordIncidence(k, p_obj)

        # Save the keyword adjacency list
        for i, j in kset.convert_adj_tuples(nnp_adj, keyword_mapping, self.kwc):
            self._session.merge(i)
            self._session.merge(j)
            kwa = KeywordAdjacency(i, j, doc)
            self._session.add(kwa)

        # Build date objects
        for key in date_dict:
            rec  = date_dict[key]
            if "dates" not in rec:
                logging.error("OK: 'dates' is not in a pydate result record.")
                continue
            dlen = len(rec["dates"])
            if rec["text"] not in content:
                logging.debug("'%s' is not in %s", rec["text"], content)
                continue
            if dlen > 1:
                for date, day_first, year_first in rec["dates"]:
                    try:
                        dobj = AmbiguousDate(date, doc, day_first, year_first, rec["prep"], key)
                    except ValueError as ex:
                        logging.error(ex)
                        continue
                    self._session.add(dobj)
            elif dlen == 1:
                for date, day_first, year_first in rec["dates"]:
                    dobj = CertainDate(date, doc, key)
                    self._session.add(dobj)
            else:
                logging.error("'dates' in a pydate result set contains no records.")

        # Process links
        for link in html.findAll('a'):
            if not link.has_attr("href"):
                logging.debug("skipping %s: no href", link)
                continue

            process = True 
            for node in link.findAll(text=True):
                if node not in worker_req_thread.result:
                    process = False 
                    break 
            
            if not process:
                logging.debug("skipping %s because it's not in the body text", link)
                break

            href, junk, junk = link["href"].partition("#")
            if "http://" in href:
                try:

                    domain_id = None 
                    domain_key = self.dc.get_Domain_key(href)
                    while domain_id is None:
                        domain_id = self.drw.get_domain(domain_key)

                    assert domain_id is not None
                    href_domain = self._session.query(Domain).get(domain_id)
                except ValueError as ex:
                    logging.error(ex)
                    logging.error("Skipping this link")
                    continue
                href_path   = self.ac.get_path_fromurl(href)
                lnk = AbsoluteLink(doc, href_domain, href_path)
                self._session.add(lnk)
                logging.debug("Adding: %s", lnk)
            else:
                href_path  = href 
                try:
                    lnk = RelativeLink(doc, href_path)
                except ValueError as ex:
                    logging.error(ex)
                    logging.error("Skipping link")
                    continue
                self._session.add(lnk)
                logging.debug("Adding: %s", lnk)

        # Construct software involvment records
        self_sir = SoftwareInvolvementRecord(self.swc.get_SoftwareVersion_fromstr(self.__VERSION__), "Processed", doc)
        date_sir = SoftwareInvolvementRecord(self.swc.get_SoftwareVersion_fromstr(pydate.__VERSION__), "Dated", doc)
        clas_sir = SoftwareInvolvementRecord(self.swc.get_SoftwareVersion_fromstr(pysen.__VERSION__), "Classified", doc)
        extr_sir = SoftwareInvolvementRecord(self.swc.get_SoftwareVersion_fromstr(worker_req_thread.version), "Extracted", doc)

        for sw in [self_sir, date_sir, clas_sir, extr_sir]:
            self._session.merge(sw, load=True)

        logging.debug("Domain: %s", domain)
        logging.debug("Path: %s", path)
        article.status = status

        # Commit to database, return True on success
        try:
            self._session.commit()
        except OperationalError as ex:
            logging.error(ex)
            self._session.rollback()
            return None

        return article.id

コード例 #21

0

ファイルを表示

        driver.get(url)
        time.sleep(3)
        accept_cookies()
        time.sleep(3)

        article = session.query(Article).filter(
            Article.article_url == url).first()
        if article is None:
            article_title = driver.find_element_by_css_selector(
                "h1.article-title").text
            article_publication_date = datetime.datetime.strptime(
                driver.find_element_by_css_selector(
                    "p.article-pubdate").text.strip(),
                "%d. %B %Y, %H:%M",
            )
            article = Article(article_title, url, article_publication_date)
            session.add(article)
            session.commit()

        page_count = 1
        if args.continue_article:
            if article.article_id != args.continue_article:
                logger.debug(
                    f"Skipping article {article} with id {article.article_id}."
                )
                continue
            last_posting_ref_id = get_last_crawled_posting_id_for_article(
                article.article_id)
            if last_posting_ref_id:
                page_count = go_to_page_with_posting_id(
                    last_posting_ref_id, page_count)

コード例 #22

0

ファイルを表示

async def worker(qu, coro_num, session, engine):
    loop = asyncio.get_running_loop()
    while True:
        if qu.qsize() == 0:
            break

        url = await qu.get()
        try:

            prox = random.choice(proxies_list)
            proxies = {'http': prox, 'https': prox}
            headers = {'User-Agent': random.choice(user_agents)}

            print(f'[Send request in {coro_num}] [queue_size {qu.qsize()}]',
                  url)
            response = await session.get(url, headers=headers, timeout=10)

            if '/category/' in url:
                post_urls = response.html.xpath('//h3/a/@href')
                for u in post_urls:
                    if u.endswith('.html'):
                        if u not in articles:
                            await qu.put(u)
                            articles.add(u)
                continue

            post = {}
            name = response.html.xpath('//h1/text()')[0]
            post['name'] = await loop.run_in_executor(None, translate_text,
                                                      name, 'ru', 'uk')
            post['slug'] = slugify(post['name'])
            post['source'] = url
            post['category'] = response.html.xpath(
                '//ul[@class="td-category"]/li/a/text()')
            post['category'] = ','.join(post['category'])
            post['image'] = response.html.xpath(
                '//div[@class="td-post-featured-image"]//img/@src')[0]
            elements = response.html.xpath('//p')
            post['content'] = ''
            post['parsed_time'] = datetime.now().date()
            for elem in elements:
                translated = await loop.run_in_executor(
                    None, translate_text, elem.text, 'ru', 'uk')
                post['content'] += f'<p>{translated}</p>\n'
                del translated

            async with engine.acquire() as cursor:
                sql = Article.insert().values(**post)
                await cursor.execute(sql)

            print('[Article saved]', post["name"])

            del url, prox, proxies, headers, response, post, sql

        except (ConnectionError, ReadTimeout):
            await qu.put(url)

        except KeyboardInterrupt:
            quit()

        except Exception as e:
            print(e, type(e), sys.exc_info()[2].tb_lineno)

コード例 #23

0

ファイルを表示

ファイル: db_data.py プロジェクト: TKais/CompSci-Catalog

           url='Artificial-Intelligence',
           image='AI.jpeg')
session.add(AI)
session.commit()

supervised_learning = Category(name='Supervised Learning',
                               url='Supervised-Learning',
                               topic_id=1,
                               image='supervised.png')
session.add(supervised_learning)
session.commit()

neural_networks = Article(name='Neural Networks',
                          category_id=1,
                          content='Artificial neural networks (ANN) or ' +
                          'connectionist systems are computing ' +
                          'systems vaguely inspired by the biological ' +
                          'neural networks that constitute ' +
                          ' animal brains.')
session.add(neural_networks)
session.commit()

unsupervised_learning = Category(name='Unsupervised Learning',
                                 url='Unupervised-Learning',
                                 topic_id=1,
                                 image='unsupervised.png')
session.add(unsupervised_learning)
session.commit()

human_computer_interaction = Topic(name='Human Computer Interaction',
                                   url='Human-Computer-Interaction',