Exemple #1
0
#!/usr/bin/env python
# encoding=utf-8
"""
数据库查询接口
"""
import sys
import os
from scpy.logger import get_logger

LOGGER = get_logger()
ROOT_PATH = os.path.abspath(
    os.path.join(os.path.dirname(__file__), os.path.pardir))
if ROOT_PATH not in sys.path:
    sys.path.append(ROOT_PATH)

from psycopg2 import pool
from psycopg2.extras import RealDictCursor
from config import PG_HOST, PG_PORT, SC_CRAWLER_PG_QUERY

sc_crawler_pg_cnn_pool = pool.ThreadedConnectionPool(minconn=1,
                                                     maxconn=20,
                                                     host=PG_HOST,
                                                     port=PG_PORT,
                                                     **SC_CRAWLER_PG_QUERY)


def sc_crawler_insert_one(pg_sql, data=None):
    conne = sc_crawler_pg_cnn_pool.getconn()
    try:
        cursor = conne.cursor()
        if data:
Exemple #2
0
from scpy.date_extractor import extract_first_date

client = boto3.client('s3',
                      aws_access_key_id=AWS_ACCESS_KEY_ID,
                      aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                      region_name=AWS_REGION_NAME)
s3_resource = boto3.resource('s3',
                             aws_access_key_id=AWS_ACCESS_KEY_ID,
                             aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                             region_name=AWS_REGION_NAME)
region_code = u"|".join(BID_REGION_CODE_TABLE.values())
reg_key = re.compile(u"^(?:%s)/.*/\d{4}/\d{1,2}/\d{1,2}/.*\.json" %
                     region_code)
s3util = aws_s3_sc.S3Util()

logger = get_logger(__file__)


def search_item():
    paginator = client.get_paginator('list_objects')

    prefixes = [
        'gan_su', 'guang_dong', 'guang_xi', 'gui_zhou', 'hai_nan', 'he_bei',
        'he_nan', 'hei_long_jiang', 'hu_bei', 'hu_nan', 'ji_lin', 'jiang_su',
        'jiang_xi', 'liao_ning', 'nei_meng_gu', 'ning_xia', 'qing_hai',
        'quan_guo', 'shan_dong', 'shan_xi-', 'shan_xi_', 'shang_hai',
        'si_chuan', 'tai_wan', 'tian_jin', 'xi_zhang', 'xiang_gang',
        'xing_jiang', 'yun_nan', 'zhe_jiang'
    ]

    count = 0
Exemple #3
0
import json
import copy
import tornado.web
import tornado.ioloop
from tornado.concurrent import run_on_executor
from tornado.web import HTTPError
from concurrent.futures import ThreadPoolExecutor

from scpy.threadlocal import set_threadlocal
from scpy.logger import get_logger

import uuid
from utils.pgutil import PgUtil
from db import postgresql_sc

logger = get_logger()

PG = PgUtil()
MAX_WORKERS = 512

database = postgresql_sc.PostgresqlUtil()
announce_keys = [
    "purchaser", "purchase_agent", "purchase_category", "title", "url",
    "region", "published_ts", "announced_ts", "winning_ts", "announce_type",
    "amount", "unit", "currency"
]
result_keys = ["winning_company", "winning_amount", "unit", "currency"]


class InfoHandler(tornado.web.RequestHandler):
    executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)