コード例 #1
0
def crawl_plat_first_letter(shuju_date="2020-01-062020-01-12"):
    """
    平台成交数据 https://shuju.wdzj.com/platdata-1.html
    """
    url = "https://shuju.wdzj.com/plat-data-custom.html"
    form_data = {
        "type": 0,
        "shujuDate": shuju_date
    }
    response = requests.post(url, data=form_data)
    status = response.status_code
    if status != 200:
        print("crawl failed. (status is not 200)")
        raise CrawlFailed('crawl failed')
    plats_data = response.json()
    for plat_data in plats_data:
        plat_id = plat_data.get('platId')
        wdzj_id = plat_data.get('wdzjPlatId')
        first_letter = plat_data.get('firstLetter')
        session = DBSession()
        if wdzj_id != 0:
            product = session.query(Product).filter_by(plat_id=plat_id).first()
            product.first_letter = first_letter

        session.commit()
        session.close()
コード例 #2
0
class BaseHandler(RequestHandler, ValidationMixin):
    def prepare(self):
        self.db = DBSession()

    def on_finish(self):
        self.db.close()

    def get_current_user(self):
        pass

    @property
    def host(self):
        if options.env == config.ENV_LOCAL:
            host = netifaces.ifaddresses(
                'en0')[2][0]['addr'] + ":" + options.port
        else:
            host = self.request.host
        return self.request.protocol + "://" + host

    def load_json(self):
        """Load JSON from the request body and store them in
        self.request.arguments, like Tornado does by default for POSTed form
        parameters.

        If JSON cannot be decoded, raises an HTTPError with status 400.
        """
        try:
            self.request.arguments = json.loads(self.request.body)
            logging.info("request arguments:{}".format(self.request.arguments))
        except:
            raise HTTPError(400, "Problems parsing JSON")
コード例 #3
0
ファイル: app.py プロジェクト: HuuBaa/huublog
def create_blog_api():
    user = g.get('user', None)
    if user is None:
        return redirect(url_for('login'))
    else:
        if user.admin:
            name = request.form['name'].encode('utf8')
            summary = request.form['summary'].encode('utf8')
            content = request.form['content'].encode('utf8')
            user_id = request.form['user_id'].encode('utf8')
            user_name = request.form['user_name'].encode('utf8')
            user_image = request.form['user_image'].encode('utf8')
            sess = DBSession()
            blog = Blogs(user_id=user_id,
                         user_name=user_name,
                         user_image=user_image,
                         name=name,
                         summary=summary,
                         content=content)
            sess.add(blog)
            sess.commit()
            sess.close()
            return 'ok'
        else:
            return redirect(url_for('login'))
コード例 #4
0
 def _cleanup(self):
     now = time()
     session = DBSession()
     session.query(Data).filter(
         now - Data.timestamp > TIMEOUT_THRESHOLD).delete()
     session.commit()
     session.close()
コード例 #5
0
def crawl_plat_overview(first_letter):
    # url = "https://www.wdzj.com/dangan/pjs/"
    url = "https://www.wdzj.com/dangan/{first_letter}/".format(
        first_letter=first_letter
    )
    print("crawl plat {}".format(first_letter))
    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        print('crawl failed: code: {}, url: {}'.format(response.status_code, url))
        return
        # raise CrawlFailed('crawl failed!')
    encode_response(response)
    html = etree.HTML(response.text)
    try:
        plat_name = html.xpath("//div[@class='title']/h1|h2")[0].text
        print("plat name: {}".format(plat_name))
        # 注册资金(实缴资金) 银行存管 投标保障

        # box = html.xpath("//div[@class='zzfwbox'] | //div[@class='bgbox-bt zzfwbox']")
        try:
            zczj = html.xpath("//div[@class='zzfwbox']/dl[1]/dd[1]//div[@class='r']")[0].text.strip().split()
        except IndexError:
            zczj = html.xpath("// div[ @class ='bgbox-bt zzfwbox'] // dl[1] / dd[1] // div[@ class ='r']")[0].text.strip().split()
        # // div[ @class ='bgbox-bt zzfwbox'] // dl[1] / dd[1] // div[@ class ='r']
        if len(zczj) == 2:
            zczj_value, sjzj = zczj
            sjzj_value = sjzj.strip('()').split(':')[1]
        else:
            zczj_value = zczj
            sjzj_value = '-'

        try:
            yhcg_value = html.xpath("//div[@class='zzfwbox']/dl[1]/dd[2]//div[@class='r']")[0].text.strip()
        except IndexError:
            yhcg_value = html.xpath("//div[ @class ='bgbox-bt zzfwbox']//dl[1]/dd[2]//div[@class='r']")[0].text.strip()

        try:
            tbbz_value = html.xpath("//div[@class='zzfwbox']/dl[2]/dd[3]//div[@class='r']")[0].text.strip()
        except IndexError:
            tbbz_value = html.xpath("//div[ @class ='bgbox-bt zzfwbox']//dl[2]/dd[3]//div[@class='r']")[0].text.strip()

        plat_overview = dict(
            plat_name=plat_name,
            zhucezijin=zczj_value,
            shijiaozijin=sjzj_value,
            yinhangcunguan=yhcg_value,
            toubiaobaozhang=tbbz_value
        )
    except AttributeError as ex:
        print('crawl failed: ex: {}, url: {}'.format(str(ex), url))
        raise
    except IndexError as ex:
        print('crawl failed: ex: {}, url: {}'.format(str(ex), url))
        raise
    new_plat_overview = PlatOverview(**plat_overview)
    session = DBSession()
    session.add(new_plat_overview)
    session.commit()
    session.close()
コード例 #6
0
def emailExists(email):
    res = False
    mySession = DBSession()
    result = mySession.query(User).filter_by(user_email = email).first()
    if not result is None:
        res = True
    mySession.close()
    return  res
コード例 #7
0
def getUserPassword(user):
    res = ""
    mySession = DBSession()
    result = mySession.query(User).filter_by(user_name = user).first()
    if not result is None:
        res = decodeData(result.user_password)
    mySession.close()
    return  res
コード例 #8
0
def otherUserHasEmail(user,email):
    res = False
    mySession = DBSession()
    result = mySession.query(User).filter(User.user_name != user).filter_by(user_email = email).first()
    if not result is None:
        res = True
    mySession.close()
    return  res
コード例 #9
0
def userExists(user):
    res = False
    mySession = DBSession()
    result = mySession.query(User).filter_by(user_name = user).first()
    if not result is None:
        res = True
    mySession.close()
    return  res
コード例 #10
0
def crawl_all_plats_info():
    """爬取所有平台的详细信息"""
    # 1. 获取所有平台的url
    # 2. 分别爬取
    session = DBSession()
    for product in session.query(Product).all():
        crawl_plat_info(product.wdzj_id)
    session.close()
コード例 #11
0
def getSectorList():
    sectors = []
    mySession = DBSession()
    results = mySession.query(Lkpsector).all()
    for result in results:
        sectors.append({"code": str(result.sector_cod), "name": result.sector_name})
    mySession.close()
    return sectors
コード例 #12
0
def getUserWithKey(key):
    res = ""
    mySession = DBSession()
    result = mySession.query(User).filter_by(user_apikey = key).first()
    if not result is None:
        res = result.user_name
    mySession.close()
    return  res
コード例 #13
0
def crawl_all_plats_overview():
    """爬取所有平台的概览信息"""
    # 1. 获取所有平台的url
    # 2. 分别爬取
    session = DBSession()
    for product in session.query(Product).all():
        crawl_plat_overview(product.first_letter)
    session.close()
コード例 #14
0
def getCountryName(cnty_cod):
    res = ""
    mySession = DBSession()
    result = mySession.query(Lkpcountry).filter_by(cnty_cod = cnty_cod).first()
    if not result is None:
        res = result.cnty_name
    mySession.close()
    return res
コード例 #15
0
def getSectorName(sector_cod):
    res = ""
    mySession = DBSession()
    result = mySession.query(Lkpsector).filter_by(sector_cod = sector_cod).first()
    if not result is None:
        res = result.sector_name
    mySession.close()
    return res
コード例 #16
0
 def getAPIKey(self):
     key = ""
     mySession = DBSession()
     result = mySession.query(userModel).filter_by(user_name = self.login).first()
     if not result is None:
         key = result.user_apikey
     mySession.close()
     return key
コード例 #17
0
def getUserData(user):
    res = None
    mySession = DBSession()
    result = mySession.query(userModel).filter_by(user_name = user).filter_by(user_active = 1).first()
    if not result is None:
        res = User(result.user_name,"",result.user_fullname,result.user_organization,result.user_email,result.user_cnty,result.user_sector,result.user_about)
    mySession.close()
    return res
コード例 #18
0
def getAPILog(requestID):
    res = ""
    mySession = DBSession()
    result = mySession.query(Apilog).filter_by(log_uuid = requestID).first()
    if not result is None:
        res = result.log_id
    mySession.close()
    return  res
コード例 #19
0
ファイル: app.py プロジェクト: samistart/med-clock
def get_patient_id(mac_address):
    content = request.get_json(silent=True)
    session = DBSession()
    patient = session.query(Patient).filter_by(
        mac_address=mac_address.upper()).first()
    id = patient.id if patient is not None else None
    session.close()
    return jsonify(id)
コード例 #20
0
 def save_data(self, data):
     session = DBSession()
     temp = data.copy()
     temp['timestamp'] = time()
     new_record = Data(**temp)
     session.add(new_record)
     session.commit()
     session.close()
コード例 #21
0
def getAPIInfo(logID):
    res = {}
    mySession = DBSession()
    result = mySession.query(Apilog).filter_by(log_id = logID).first()
    if not result is None:
        res["log_datetime"] = result.log_datetime
        res["log_ip"] = result.log_ip
    mySession.close()
    return  res
コード例 #22
0
ファイル: app.py プロジェクト: samistart/med-clock
def update_patient(id):
    content = request.get_json(silent=True)
    session = DBSession()
    patient = session.query(Patient).filter_by(id=id).one()
    for key, value in content.iteritems():
        setattr(patient, key, value)
    stmt = session.add(patient)
    session.commit()
    session.close()
    return jsonify()
コード例 #23
0
ファイル: action.py プロジェクト: blockmov/BMTF
    def fetch_action(cls, id):
        try:
            session = DBSession()
            action = session.query(Action).filter(Action.id == id).first()
            session.close()
        except IntegrityError as error:
            logging.error(error)
            return {}

        return action.to_dict()
コード例 #24
0
def addToLog(log_user,log_type,log_message):
    mySession = DBSession()
    newLog = Activitylog(log_user,log_type,log_message)
    try:
        transaction.begin()
        mySession.add(newLog) #Add the new log to MySQL
        transaction.commit()
        mySession.close()
    except:
        transaction.abort()
        mySession.close()
コード例 #25
0
ファイル: app.py プロジェクト: HuuBaa/huublog
def before_req():
    cookie = request.cookies.get('huusession')
    if cookie:
        user_id = cookie.split('-')[0]
        if user_id:
            try:
                sess = DBSession()
                user = sess.query(Users).filter(Users.id == user_id).one()
                sess.close()
                g.user = user
            except:
                g.user = None
コード例 #26
0
def getCountryList():
    countries = []
    mySession = DBSession()
    results = mySession.query(Lkpcountry).all()
    for result in results:
        try:
            name = unicode(result.cnty_name.decode("cp1252").encode("utf-8"))
            countries.append({"code": result.cnty_cod, "name": name})
        except:
            countries.append({"code": result.cnty_cod, "name": "Unknown"})
    mySession.close()
    return countries
コード例 #27
0
def getStats(currUser = None):
    data = {}
    mySession = DBSession()
    data["totUsers"] = mySession.query(User).count()
    if currUser == None:
        data["totFeeds"] = 0
        data["totModels"] = 0
    else:
        data["totFeeds"] = 0
        data["totModels"] = 0
    mySession.close()
    return data
コード例 #28
0
def changeUserPassword(user,password):
    mySession = DBSession()
    try:
        transaction.begin()
        mySession.query(User).filter_by(user_name = user).update({"user_password": encodeData(password)})
        transaction.commit()
        mySession.close()
        return True
    except:
        transaction.abort()
        mySession.close()
        return False
コード例 #29
0
ファイル: action.py プロジェクト: blockmov/BMTF
    def update_action_exist(cls, id, url):
        try:
            session = DBSession()
            actions = session.query(Action).filter(Action.url == url).filter(
                Action.id != id).all()
            session.close()
            if actions:
                return True
        except IntegrityError as error:
            logging.error(error)
            return False

        return False
コード例 #30
0
def addAPILog(ipaddress,user,requestID,inputData):
    mySession = DBSession()
    newApilog = Apilog(ipaddress,user,requestID,inputData)
    try:
        transaction.begin()
        mySession.add(newApilog)
        transaction.commit()
        mySession.close()
        return True
    except:
        transaction.abort()
        mySession.close()
        return False
コード例 #31
0
ファイル: shuffle.py プロジェクト: velpavel/SecretSanta
def start_shuffle():
    random.seed()
    session = DBSession()
    groups = session.query(Group).filter(
        Group.date_shuffle <= datetime.datetime.now().date(),
        Group.shuffle_done == False, Group.active == True).all()
    for group in groups:
        if len(group.members) == 0:
            group.shuffle_done = True
            session.commit()
            send_message(
                group.owner.telegramid,
                'К сожалению, в вашей группе "{}" нет ни одного участника. Некому высылать подарки'
                .format(group.name))
        elif len(group.members) == 1:
            group.shuffle_done = True
            session.commit()
            send_message(
                group.owner.telegramid,
                'К сожалению в вашей группе "{}" всего один учатник. Некому высылать подарки.'
                .format(group.name))
            send_message(
                group.members[0].user.telegramid,
                'Вы единственный участник группы {}. Подарите себе что-нибудь приятное'
                .format(group.name))
        elif len(group.members) > 1:
            member_list = group.members[:]
            random.shuffle(member_list)
            member_list[-1].send_to = member_list[0].user
            for i in range(len(member_list) - 1):
                member_list[i].send_to = member_list[i + 1].user
            group.shuffle_done = True
            session.commit()
            for member in group.members:
                to_member = session.query(Member).filter(
                    Member.group == group,
                    Member.user == member.send_to).first()
                text = '''Распределение получателей для группу {} завершено!\nВы Санта для {}. Пожелания к подарку: {}
    подарок высылать по следующему адресу: {} {}. На имя {}'''.format(
                    group.name, to_member.user.name, to_member.suggestions,
                    to_member.user.index, to_member.user.address,
                    to_member.user.fio)
                send_message(member.user.telegramid, text)
            send_message(
                group.owner.telegramid,
                'Распределение получателей для группу {} завершено! Участников: {}.\
                              Всем участникам разосланы их получатели.'.format(
                    group.name, len(group.members)))
    session.close()
    save_to_log('system', comment_text="Shuffle done")
コード例 #32
0
ファイル: action.py プロジェクト: blockmov/BMTF
    def add(cls, name, url):
        ret = True
        session = DBSession()
        record = Action(name, url)
        session.add(record)
        try:
            session.commit()
            session.close()
            logging.info('add action success<name=%s, url=%s>' % (name, url))
        except IntegrityError as error:
            logging.error(str(error))
            ret = False

        return ret
コード例 #33
0
ファイル: app.py プロジェクト: HuuBaa/huublog
def delete_blog_api():
    user = g.get('user', None)
    blog_id = request.args.get('id', '')
    if user is None:
        return redirect(url_for('login'))
    else:
        if user.admin:
            sess = DBSession()
            blog = sess.query(Blogs).filter(Blogs.id == blog_id).delete()
            sess.commit()
            sess.close()
        else:
            return redirect(url_for('login'))
        return 'ok'
コード例 #34
0
ファイル: app.py プロジェクト: samistart/med-clock
def create_patient():
    content = request.get_json(silent=True)
    session = DBSession()
    session.expire_on_commit = False
    patient = Patient()
    if content and ("mac_address" in content):
        patient.mac_address = content["mac_address"].upper()
    stmt = session.add(patient)
    print(patient.id)
    session.commit()
    id = patient.id
    print(patient.id)
    session.close()
    return jsonify(patient.id)
コード例 #35
0
def updateProfile(user,data):
    mySession = DBSession()
    try:
        transaction.begin()
        mySession.query(User).filter_by(user_name = user).update({"user_fullname": data["user_fullname"],"user_organization": data["user_organization"],
                                                                  "user_email": data["user_email"],"user_cnty": data["user_cnty"],"user_sector": data["user_sector"],
                                                                  "user_about": data["user_about"]})
        transaction.commit()
        mySession.close()
        return True
    except Exception, e:
        print str(e)
        transaction.abort()
        mySession.close()
        return False
コード例 #36
0
def addUser(userData):
    mySession = DBSession()
    newUser = User(userData["user_name"],userData["user_fullname"],encodeData(userData["user_password"]),userData["user_organization"],userData["user_email"],
                   str(uuid.uuid4()),userData["user_cnty"],userData["user_sector"],"")
    try:
        transaction.begin()
        mySession.add(newUser) #Add the ne user to MySQL
        transaction.commit()

        mySession.close()
        return True,""
    except Exception, e:
        transaction.abort()
        mySession.close()
        return False,str(e)
コード例 #37
0
ファイル: bihu_helper.py プロジェクト: jiffies/bihu-assistant
 def vote_article(self, artId):
     data = {
         "userId": self.userId,
         "accessToken": self.accessToken,
         "artId": artId
     }
     r = requests.post(VOTE_ARTICLE_API, data, verify=False)
     result = r.json()
     vote = VoteRecord(artId=artId,
                       result=result['res'],
                       message=result['resMsg'])
     session = DBSession()
     session.add(vote)
     session.commit()
     session.close()
コード例 #38
0
def save_to_log(from_who='user',
                message_type=None,
                message=None,
                comment_text='',
                msg_text=''):
    """Сохранить в лог. Внимательно передавать from_who

    from_who - 'bot', 'user', 'system'. От кого сообщение
    message - тип message. Сообщение от пользователя.
    comment_text - дополнительный текст.
    msg_text - текст сообщения. Использовать для сохранения ответа бота на message пользователя

    Примеры.
    save_to_log('user', message) - сохранить сообщение от пользователя.
    save_to_log('system', comment_text=err_text) - сохранить сообщение от системы. Например, об ошибке.
    save_to_log('bot', message=message_from_user, msg_text=bot_msg_text) - сохранить сообщение от бота пользоателю.
    """
    if from_who not in ('bot', 'user', 'system'):
        comment_text += ' ' + from_who
        from_who = 'need_help'
    operation = None
    tid = None
    session = DBSession()
    if message:
        tid = message.from_user.id
        if from_who == 'user':
            if message.content_type == 'text':
                msg_text = message.text
            if message.content_type == 'contact':
                msg_text = str(message.contact)

        operation = session.query(Operation).filter_by(telegramid=tid).first()

    if operation is None: operation = Operation()
    log = Log(datetime=datetime.datetime.now(),
              from_who=from_who,
              user_id=tid,
              msg_text=msg_text,
              msg_type=message_type,
              operation=operation.current_operation,
              status=operation.operation_status,
              additional_info=operation.additional_info_db,
              function=inspect.stack()[1][3],
              comment=comment_text)
    session.add(log)
    session.commit()
    session.close()
コード例 #39
0
ファイル: action.py プロジェクト: blockmov/BMTF
    def delete(cls, id):
        ret = True
        session = DBSession()
        actions = session.query(Action).filter(Action.id == id).all()
        if actions:
            try:
                session.delete(actions[0])
                session.commit()
                session.close()
            except IntegrityError as error:
                logging.error(error)
                ret = False
        else:
            logging.error('Action not found')
            ret = False

        return ret
コード例 #40
0
def crawl_problem_plats():
    """
    问题平台 https://shuju.wdzj.com/problem-1.html
    """
    url = "https://shuju.wdzj.com/problem-list-all.html"
    params = {"year": ""}
    response = requests.get(url, params=params, headers=HEADERS)
    json_data = response.json()
    problem_plats = json_data.get('problemList')

    for problem_plat in problem_plats:
        session = DBSession()
        plat_id = problem_plat.get('platId')
        wdzj_id = problem_plat.get('wdzjPlatId')
        plat_name = problem_plat.get('platName')
        if wdzj_id != 0:
            session.execute(
                """
                INSERT INTO products
                    (plat_id, wdzj_id, name)
                    select
                    '{plat_id}', '{wdzj_id}', '{plat_name}'
                WHERE not EXISTS (SELECT *
                    FROM products
                    WHERE plat_id = '{plat_id}');
                """.format(
                    plat_id=plat_id, wdzj_id=wdzj_id, plat_name=plat_name
                )
            )
        new_problem_plat = ProblemPlat(
            plat_id=problem_plat.get('platId'),  # plat_id
            wdzj_id=problem_plat.get('wdzjPlatId'),  # wdzj_id
            plat_name=problem_plat.get('platName'),  # plat_name
            area=problem_plat.get('area'),  # 地区
            oneline_time=problem_plat.get('onlineTime'),  # 上线时间
            problem_date=problem_plat.get('problemTime'),  # 问题时间
            event_type=problem_plat.get('type'),  # 事件类型
            people_num=problem_plat.get('peopleNumber'),
            status1=problem_plat.get('status1'),  # 保留字段status1
            status2=problem_plat.get('status2')  # 保留字段status2
        )


        session.add(new_problem_plat)
        session.commit()
        session.close()
コード例 #41
0
ファイル: app.py プロジェクト: HuuBaa/huublog
def authenticate_api():
    email = request.form['email'].encode('utf8')
    passwd = request.form['passwd'].encode('utf8')
    sess = DBSession()
    try:
        user = sess.query(Users).filter(Users.email == email).one()
    except:
        return 'emailError'
    sess.close()
    s = '%s:%s' % (email, passwd)
    passwd = hashlib.sha1(s.encode('utf8')).hexdigest()
    if passwd == user.passwd:
        cookie_str = '%s-%s-%s' % (user.id, user.email, user.passwd)
        L = [user.id, hashlib.sha1(cookie_str.encode('utf8')).hexdigest()]
        resp = make_response()
        resp.set_cookie('huusession', '-'.join(L))
        return resp
    else:
        return 'passwordError'
コード例 #42
0
ファイル: app.py プロジェクト: HuuBaa/huublog
def queryAllDesc(table_class, offset=None, limit=None):
    if offset is None and limit is None:

        sess = DBSession()
        qClass = sess.query(table_class).order_by(
            table_class.create_at.desc()).all()
        sess.close()
    else:
        sess = DBSession()
        qClass = sess.query(table_class).order_by(
            table_class.create_at.desc()).offset(offset).limit(limit).all()
        sess.close()

    for user in qClass:
        user.passwd = '******'
    qClass_list = []
    for i in range(len(qClass)):
        qClass_list.append(qClass[i].to_dict())
    return qClass_list
コード例 #43
0
def getUserInfo(userid):
    mySession = DBSession()

    sql = "SELECT user_fullname, user_organization, user_email, user_about,lkpcountry.cnty_name,lkpsector.sector_name FROM " \
          "user,lkpcountry,lkpsector WHERE user_cnty = lkpcountry.cnty_cod AND user_sector = lkpsector.sector_cod AND user_name = " + "'" + userid + "'"

    connection = mySession.connection()
    results = connection.execute(sql)

    userInfo = {}
    for result in results:
        userInfo["user_fullname"] = result[0];
        userInfo["user_organization"] = result[1];
        userInfo["user_email"] = result[2];
        userInfo["user_about"] = result[3];
        userInfo["cnty_name"] = result[4];
        userInfo["sector_name"] = result[5];
    connection.close()
    mySession.close()
    return userInfo
コード例 #44
0
ファイル: app.py プロジェクト: HuuBaa/huublog
def create_comments_api():
    user = g.get('user', None)
    if user is None:
        return 'notlogin'
    else:
        blog_id = request.form['blog_id']
        user_id = request.form['user_id']
        user_name = request.form['user_name']
        user_image = request.form['user_image']
        content = request.form['content']
        sess = DBSession()
        sess.add(
            Comments(blog_id=blog_id,
                     user_id=user_id,
                     user_name=user_name,
                     user_image=user_image,
                     content=content))
        sess.commit()
        sess.close()
        return 'ok'
コード例 #45
0
def crawl_plat_detail(plat_id):
    """
    平台数据详情页(指数) https://www.wdzj.com/zhishu/detail-{plat_id}.html
    """
    url = "https://www.wdzj.com/zhishu/detail-{plat_id}.html".format(
        plat_id=plat_id
    )
    print("crawl plat {}".format(plat_id))
    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        print('crawl failed: code: {}, url: {}'.format(response.status_code, url))
        raise CrawlFailed('crawl failed!')
        # raise CrawlFailed('crawl failed!')
    encode_response(response)
    html = BeautifulSoup(response.text, features='lxml')
    x_html = etree.HTML(response.text)
    try:
        plat_name = x_html.xpath("//div[@class='title']/h1|h2")[0].text
        for div in html.select('.fr .xlist li div'):
            for child in div.children:
                if isinstance(child, Tag):
                    child.extract()
        texts = list(reversed([div.text.strip() for div in html.select('.fr .xlist li div')]))
    except AttributeError as ex:
        print('crawl failed: ex: {}, url: {}'.format(str(ex), url))
        raise
    except IndexError as ex:
        print('crawl failed: ex: {}, url: {}'.format(str(ex), url))
        raise
    results = dict(zip(texts[0::2], texts[1::2]))
    trans_results = {}
    # 汉字转拼音
    for k, v in results.items():
        trans_results[''.join(lazy_pinyin(k))] = v
    trans_results['plat_id'] = plat_id
    trans_results['plat_name'] = plat_name
    new_detail = PlatDetail(**trans_results)
    session = DBSession()
    session.add(new_detail)
    session.commit()
    session.close()
コード例 #46
0
ファイル: app.py プロジェクト: HuuBaa/huublog
def edit_blog():
    blog_id = request.args.get('id', '')
    if blog_id:
        try:
            sess = DBSession()
            blog = sess.query(Blogs).filter(Blogs.id == blog_id).one()
            sess.close()
        except:
            blog = None
    else:
        return redirect(url_for('manage_blogs'))
    user = g.get('user', None)
    if user is None:
        return redirect(url_for('login'))
    else:
        if user.admin:
            return render_template('manage_blogs_edit.html',
                                   user=user,
                                   blog=blog)
        else:
            return redirect(url_for('login'))
コード例 #47
0
ファイル: action.py プロジェクト: blockmov/BMTF
    def edit(cls, id, name=None, url=None):
        ret = True
        session = DBSession()
        action = session.query(Action).filter(Action.id == id).first()
        if not action:
            logging.error('not fount action<id=%s>' % id)
            return False
        if name:
            action.name = name
        if url:
            action.offset = url
        try:
            session.commit()
            logging.info('edit action success<id=%s, name=%s, url=%s>' %
                         (action.id, action.name, action.url))
            session.close()
        except IntegrityError as error:
            logging.error(str(error))
            ret = False

        return ret
コード例 #48
0
ファイル: app.py プロジェクト: HuuBaa/huublog
def get_blog(blog_id):
    try:
        sess = DBSession()
        blog = sess.query(Blogs).filter(Blogs.id == blog_id).one()
        sess.close()
    except:
        blog = None
        return 'can not find the blog'
    user = g.get('user', None)
    try:
        sess = DBSession()
        comments = sess.query(Comments).filter(
            Comments.blog_id == blog_id).all()
        sess.close()
    except:
        comments = None

    return render_template("blog.html",
                           blog=blog,
                           user=user,
                           comments=comments)
コード例 #49
0
def crawl_products():
    url = "https://files.wdzjimages.com/shuju/product/search.json"
    print("crawl products...")
    response = requests.get(url)
    status = response.status_code
    if status != 200:
        print("crawl failed. (status is not 200)")
        raise CrawlFailed('crawl failed')
    products = response.json()
    for product in products:
        session = DBSession()
        new_product = Product(
            plat_id=product.get('platId'),
            name=product.get('platName'),
            old_name=product.get('oldPlatName'),
            pingyin=product.get('allPlatNamePin'),
            pin=product.get('autoPin')
        )
        session.add(new_product)
        session.commit()
        session.close()
コード例 #50
0
def crawl_problem_plats_first_letter():
    """
    问题平台 https://shuju.wdzj.com/problem-1.html
    """
    url = "https://shuju.wdzj.com/problem-list-all.html"
    params = {"year": ""}
    response = requests.get(url, params=params, headers=HEADERS)
    json_data = response.json()
    problem_plats = json_data.get('problemList')

    for problem_plat in problem_plats:
        plat_id = problem_plat.get('platId')
        wdzj_id = problem_plat.get('wdzjPlatId')
        first_letter = problem_plat.get('firstLetter')
        if wdzj_id != 0:
            session = DBSession()
            product = session.query(Product).filter_by(plat_id=plat_id).first()
            product.first_letter = first_letter

            session.commit()
            session.close()
コード例 #51
0
def getUserLog(user,limit = True):
    sql = "SELECT DATE_FORMAT(DATE(log_datetime), '%%W %%D %%M %%Y') as log_date,TIME(log_datetime) as log_time,log_type,log_message,log_datetime as date1,log_datetime as date2 FROM activitylog WHERE log_user = '******' ORDER BY date1 DESC,date2 ASC,log_id desc"
    if limit:
        sql = sql + " LIMIT 20"
    mySession = DBSession()
    connection = mySession.connection()
    activities = connection.execute(sql)
    items = []
    count = 1
    for activity in activities:
        if count%2 == 0:
            alt = False
        else:
            alt= True
        count = count + 1
        if activity[2] == "PRF":
            color = "terques"
            icon = "fa-user"
        else:
            if activity[2] == "MOD":
                color = "purple"
                icon = "fa-gears"
            else:
                if activity[2] == "FED":
                    color = "blue"
                    icon = "fa-leaf"
                else:
                    if activity[2] == "API":
                        color = "green"
                        icon = "fa-bolt"
                    else:
                        color = "red"
                        icon = "fa-bullhorn"

        items.append({"date":activity[0],"time":activity[1],"type":activity[2],"message":activity[3],"alt":alt,"icon":icon,"color":color})
    connection.close()
    mySession.close()
    return items
コード例 #52
0
def checkLogin(user,password):
    mySession = DBSession()
    result = mySession.query(userModel).filter_by(user_name = user).filter_by(user_active = 1).first()
    if result is None:
        mySession.close()
        return False
    else:
        cpass = decodeData(result.user_password)
        if cpass == password:
            mySession.close()
            return True
        else:
            mySession.close()
            return False
コード例 #53
0
ファイル: crawl.py プロジェクト: skymoney/zhihu_crawler
def crawl_question(url, cookie, scheduler):
	#crawl question page
	#like host: http://www.zhihu.com/question/123456
	print 'start to question from url: ', url
	question_res = requests.get(url, cookies=cookie)
	with open('pages/question' + re.search(r'\d+', url).group() + '.html', 'wb') as question_file:
		question_file.write(question_res.content)

	session = DBSession()
	if question_res.status_code == 200:
		question_dom = BeautifulSoup(question_res.content)

		#find more question
		if question_dom.find("div", id="zh-question-related-questions"):
			all_related_ques = question_dom.find("div", 
				id="zh-question-related-questions").find_all("a", class_="question_link")
		
			for ques in all_related_ques:
				'''
				new_url = ques.get('href') if ques.get('href').startswith('http') \
					else 'http://www.zhihu.com/' + ques.get('href')
				'''

				if ques.get('href').startswith('http'):
					new_url = ques.get('href')
				elif ques.get('href').startswith('/'):
					new_url = 'http://www.zhihu.com' + ques.get('href')
				else:
					new_url = 'http://www.zhihu.com/' + ques.get('href')

				scheduler.add(new_url)

		#crawl data
		q_id = re.search(r'\d+', url).group()
		q_title = question_dom.find("div", 
			id="zh-question-title").find('h2').contents[0].encode('utf-8')
		q_detail = ''
		if question_dom.find('div', 
			id='zh-question-detail').find('textarea'):
			q_detail = question_dom.find('div', 
			id='zh-question-detail').find('textarea').get_text().encode('utf-8')
		else:
			q_detail = question_dom.find('div', 
				id='zh-question-detail').get_text().encode('utf-8')
		q_author = 'default'
		question = Question(q_id=q_id, title=q_title, author=q_author, content=q_detail)

		#crawl answer data
		#a_id author votes content last_modify
		answer_list = question_dom.find_all("div", class_="zm-item-answer")

		for answer in answer_list:
			a_id = answer.get('data-aid')
			author_h3 = answer.find("h3", class_="zm-item-answer-author-wrap")
			if author_h3.find_all('a') and len(author_h3.find_all('a'))>1:
				author = author_h3.find_all('a')[1].text.encode('utf-8')
			else:
				#匿名用户
				author = author_h3.text.encode('utf-8')
			

			votes_div = answer.find("div", class_="zm-votebar")

			try:
				votes_span = votes_div.find_all("span")
			except:
				print votes_div
				sys.exit(1)

			votes = votes_span[1].text if len(votes_span)>1 else 0

			'''
			content = answer.find("div", 
				class_="zm-editable-content").get_text().encode('utf-8')
			'''
			content_div = answer.find("div", class_='zm-editable-content')
			if content_div is None:
				content = "None"
			else:
				content = content_div.get_text().encode('utf-8')

			#last modify date
			try:
				last_modify_date = answer.find("a", class_='answer-date-link').text.split()[1]
				if last_modify_date.index(':') != -1:
					last_modify_date = datetime.now().strftime("%Y-%m-%d")
			except:
				last_modify_date = "2015-06-17"

			last_modify = datetime.strptime(last_modify_date, "%Y-%m-%d")

			answer_info = Answer(a_id=a_id, author=author, 
				votes=votes, content=content, last_modify=last_modify)

			session.add(answer_info)

			question.answers.append(answer_info)

		session.add(question)
		session.commit()
		session.close()

		#process_question(question)
	else:
		print "Error: ", str(question_res.status_code)
コード例 #54
0
ファイル: crawler.py プロジェクト: spartonia/addressCollector
def update_links(
    rss_feed_url
):
    """Inserts new announcements into the database.
    Notes:
        * Publishing date is the date the property is listed for sale. It might be very old,
        * We insert all entries in the rss feed to the database. The url field is unique so duplicates are not allowed.
        * When querying new entries, keep in mind to query the date based on ( timestamp = 'today' & pubDate =
        'close enough' )so only new listed properties are queried.

    Parameters
    ----------
    rss_feed_url : str

    """
    feed = feedparser.parse(rss_feed_url)

    entries = feed['entries']

    num_new_links = 0

    print('Updating liks database ..')

    session = DBSession()
    url_rs = session.query(Link.url)
    url_list = [url for (url,) in url_rs]
    session.close()

    session = DBSession()
    browser = logged_in_browser()
    for cnt, entry in enumerate(entries):
        link = entry['link']
        published_str = entry['published']
        print(cnt, ':', link)
        if link in url_list:
            print('duplicate url, passing..')
            continue
        published = date_parser.parse(published_str)
        pubDate = datetime.fromordinal(published.toordinal())

        new_link = Link(url=link, date=pubDate)
        session.add(new_link)
        time.sleep(random.choice(range(20, 60))/10)
        try:
            data = crawl_hemnet_page(new_link.url, browser=browser)
        except Exception as e:
            print('Error crawling hemnet page.', e.message)
            continue
        new_apt = Apartment(**data)
        new_apt.link = new_link
        session.add(new_apt)

        try:
            session.commit()
            num_new_links += 1
        except IntegrityError as e:
            print(e.message)
            print(link)
            session.rollback()
        except Exception as e:
            print(e.message)
            session.rollback()
        finally:
            session = DBSession()

    print('Done!')
    print('%s new links added.' % num_new_links)