コード例 #1
0
 def new_links_update():
     if len(GlobalParser.flats_ids) == 0:
         return
     logger.info("Filtering offers and sending them to queue")
     # Filter ids from duplicates
     flats = set(GlobalParser.flats_ids)
     # Get these flats from DB
     flats = list(Databases.get_flats_db().find(
         {
             'id': {
                 '$in': list(flats)
             },
             'location.metro': {
                 '$exists': True
             }
         }, {
             'id': 1,
             'location.metro.name': 1,
             'price': 1
         }))
     for flat in flats:
         flat['price'] = GlobalParser.fix_price(flat)
     # Get all users with their fields
     users = Databases.get_users_db().find(
         {
             'max_price': {
                 '$exists': True
             },
             'metro_stations': {
                 '$exists': True
             }
         }, {
             'id': 1,
             'max_price': 1,
             'metro_stations': 1
         })
     # For each user filter links they need
     for user in users:
         user_id = user['id']
         user_max_price = user['max_price']
         user_stations = set(user['metro_stations'])
         # Filter by stations
         good_flats = (
             flat for flat in flats
             if flat['location']['metro']['name'].lower() in user_stations)
         good_flats = (flat for flat in good_flats
                       if flat['price'] <= user_max_price)
         good_flats = list(good_flats)
         if len(good_flats) > 0:
             message = {
                 'uid': user_id,
                 'offers': [flat['id'] for flat in good_flats],
             }
             logger.debug("Sending {} offers to user {}".format(
                 len(good_flats), user_id))
             GlobalParser.offers_send_function(message)
コード例 #2
0
    def check_suspicious():
        logger.info("Checking suspicious")
        db = Databases.get_flats_db()
        logger.debug("Getting target flats")
        target_flats = list(db.find({'seen_by_suspicious_validator': False,
                                     'location.metro': {'$exists': True}},
                                    {'id': 1,
                                     'price': 1,
                                     'location.metro.name': 1}))
        logger.debug("Got {}".format(len(target_flats)))
        for flat in target_flats:
            flat['price'] = SuspiciousChecker.fix_price(flat)
            flat['metro'] = flat['location']['metro']['name']
            del flat['location']
        logger.debug("Fixed names")
        suspicious_flats = []
        stations = set(flat['metro'].lower() for flat in target_flats)
        logger.debug("Getting suspicious flats")
        for station in stations:
            station_flats = [flat for flat in target_flats if flat['metro'].lower() == station]
            suspicious_num = math.ceil(len(station_flats) * config.suspicious_fraction)
            station_flats.sort(key=lambda x: x['price'])
            suspicious_flats.extend(station_flats[:suspicious_num])

        target_flats_ids = [flat['id'] for flat in target_flats]
        suspicious_flats_ids = [flat['id'] for flat in suspicious_flats]
        logger.debug("Pushing to DB")
        db.update_many({'id': {'$in': target_flats_ids}}, {'$set': {'seen_by_suspicious_validator': True}})
        db.update_many({'id': {'$in': suspicious_flats_ids}}, {'$set': {'suspicious': True}})
コード例 #3
0
def get_new_offers(url, time=config.cian_default_timeout):
    db = Databases.get_flats_db()
    ids = {}
    for offer in get_offers(url, time):
        if offer['id'] in ids.keys():
            old = ids[offer['id']]
            old = old.copy()
            if old != offer:
                logger.error("Different dicts: {}\n{}".format(offer, old))
        else:
            offer['seen_by_suspicious_validator'] = False
            offer['suspicious'] = False
            ids[offer['id']] = offer
            db.find_one_and_replace({'id': offer['id']}, offer, upsert=True)
            yield offer
    logger.info("Totally parsed {} real offers.".format(len(ids)))
コード例 #4
0
def get_new_offers(url, time=config.cian_default_timeout):
    db = Databases.get_flats_db()
    ids = {}
    for offer in get_offers(url, time):
        if offer['id'] in ids.keys():
            old = ids[offer['id']]
            old = old.copy()
            if old != offer:
                logger.error("Different dicts: {}\n{}".format(offer, old))
        else:
            offer['seen_by_suspicious_validator'] = False
            offer['suspicious'] = False
            ids[offer['id']] = offer
            db.find_one_and_replace({'id': offer['id']}, offer, upsert=True)
            yield offer
    logger.info("Totally parsed {} real offers.".format(len(ids)))
コード例 #5
0
class InvitesManager:
    db = Databases.get_invites_db()

    @staticmethod
    def generate_random_str(n):
        return ''.join(random.SystemRandom().choice(string.ascii_uppercase +
                                                    string.digits)
                       for _ in range(n))

    @staticmethod
    def pull_invite(invite):
        invite = InvitesManager.db.find_one({'id': invite})
        if invite is not None:
            InvitesManager.db.delete_one({'_id': invite['_id']})
            return True
        return False

    @staticmethod
    def insert_invite(invite):
        InvitesManager.db.insert_one({'id': invite})

    @staticmethod
    def insert_random_invites(count, len):
        for _ in range(count):
            InvitesManager.insert_invite(
                InvitesManager.generate_random_str(len))

    @staticmethod
    def insert_many_invites(invites):
        InvitesManager.db.insert_many([{'id': invite} for invite in invites])

    @staticmethod
    def get_invites():
        return InvitesManager.db.find()

    @staticmethod
    def get_invite():
        return InvitesManager.db.find_one()

    @staticmethod
    def get_invites_list(count=config.default_invites_count):
        invites = [invite['id'] for invite in InvitesManager.get_invites()]
        if len(invites) < count:
            diff = config.default_invites_count - len(invites)
            InvitesManager.insert_random_invites(diff, config.invite_length)
            invites = [invite['id'] for invite in InvitesManager.get_invites()]
        return invites[:count]
コード例 #6
0
    # Определяем по ним число страниц
    if num_of_offers == 0:
        return
    raw_offers = get_raw_offers(page_bs)
    yield from (parse_raw_offer(offer) for offer in raw_offers)
    num_of_offers -= len(raw_offers)
    i = 2
    while num_of_offers > 0:
        url = change_params(raw_url, totime=url_time, p=i)
        raw_offers = get_raw_offers(safe_request(url))
        logger.debug("Parsing {} page".format(i))
        yield from (parse_raw_offer(offer) for offer in raw_offers)
        num_of_offers -= len(raw_offers)
        i += 1


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description='CIAN parser by URL')
    parser.add_argument('url', type=str, help='URL to parse')
    parser.add_argument('-t',
                        '--time',
                        type=int,
                        help='Set time of last parsing',
                        default=360000000000000000000)
    args = parser.parse_args()
    db = Databases.get_flats_db()
    for info, info_id in get_offers(args.url, args.time):
        write_to_database(info_id, info, db)
コード例 #7
0
 def init():
     User.db = Databases.get_users_db()
コード例 #8
0
 def init():
     User.db = Databases.get_users_db()
コード例 #9
0
    # Получаем число предложений
    num_of_offers = get_count_of_offers(page_bs)
    logger.debug("Parsing {} offers".format(num_of_offers))
    # Определяем по ним число страниц
    if num_of_offers == 0:
        return
    raw_offers = get_raw_offers(page_bs)
    yield from (parse_raw_offer(offer) for offer in raw_offers)
    num_of_offers -= len(raw_offers)
    i = 2
    while num_of_offers > 0:
        url = change_params(raw_url, totime=url_time, p=i)
        raw_offers = get_raw_offers(safe_request(url))
        logger.debug("Parsing {} page".format(i))
        yield from (parse_raw_offer(offer) for offer in raw_offers)
        num_of_offers -= len(raw_offers)
        i += 1


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description='CIAN parser by URL')
    parser.add_argument('url', type=str, help='URL to parse')
    parser.add_argument('-t', '--time', type=int, help='Set time of last parsing',
                        default=360000000000000000000)
    args = parser.parse_args()
    db = Databases.get_flats_db()
    for info, info_id in get_offers(args.url, args.time):
        write_to_database(info_id, info, db)