コード例 #1
0
    def modify_tt_time(self, uid: str, amount: str):
        doc_id = generate_id_from_uid(uid)
        if self.db.contains(doc_id=doc_id):
            if amount[0] == '-':
                amount_to_add = int(
                    isodate.parse_duration(amount[1:]).seconds) * -1
            else:
                amount_to_add = int(isodate.parse_duration(amount).seconds)

            tt = self.db.get(doc_id=doc_id)

            tt['origin_time'] = self.convert_sec_to_time(
                self.convert_time_to_secs(tt['origin_time']) + amount_to_add)
            tt['destination_time'] = self.convert_sec_to_time(
                self.convert_time_to_secs(tt['destination_time']) +
                amount_to_add)
            tt['description'] = tt['origin_time'] + tt['description'][4:]

            if 'entry_time' in tt:
                tt['entry_time'] = self.convert_sec_to_time(
                    self.convert_time_to_secs(tt['entry_time']) +
                    amount_to_add)

            for loc in tt['locations']:
                if 'dep' in loc:
                    loc['dep'] = self.convert_sec_to_time(
                        self.convert_time_to_secs(loc['dep']) + amount_to_add)
                if 'arr' in loc:
                    loc['arr'] = self.convert_sec_to_time(
                        self.convert_time_to_secs(loc['arr']) + amount_to_add)

            self.db.remove(doc_ids=[doc_id])
            self.db.insert(table.Document(tt, doc_id=doc_id))
コード例 #2
0
 def add_tt_if_not_present(self, tt: dict):
     """
     Adds TT to TT DB if one with the same uid is NOT already present.
     :param tt: json TT to add.
     """
     doc_id = generate_id_from_uid(tt['uid'])
     if not self.db.contains(doc_id=doc_id):
         self.db.insert(table.Document(tt, doc_id=doc_id))
コード例 #3
0
 def add_seed_groups(self, seed_groups: list):
     """
     Adds the seed groups to the main header DB overwriting any if present.
     :param seed_groups: list of seed groups to add.
     """
     if self.db.contains(doc_id=3):
         self.db.remove(doc_ids=[3])
     self.db.insert(table.Document({'seed_groups': seed_groups}, doc_id=3))
コード例 #4
0
 def add_categories_map(self, cat_map: dict):
     """
     Adds the map of xml train categories to the main header DB overwriting one if present.
     :param cat_map: map of train categories to add.
     """
     if self.db.contains(doc_id=2):
         self.db.remove(doc_ids=[2])
     self.db.insert(table.Document({'categories_map': cat_map}, doc_id=2))
コード例 #5
0
 def add_header(self, header: dict):
     """
     Adds the TT header to the main header DB overwriting one if present.
     :param header: Header to add.
     """
     if self.db.contains(doc_id=1):
         self.db.remove(doc_ids=[1])
     self.db.insert(table.Document(header, doc_id=1))
コード例 #6
0
 def add_tt(self, tt: dict):
     """
     Adds TT to TT DB overwriting one with the same uid if present.
     :param tt: json TT to add.
     """
     doc_id = generate_id_from_uid(tt['uid'])
     if self.db.contains(doc_id=doc_id):
         self.db.remove(doc_ids=[doc_id])
     self.db.insert(table.Document(tt, doc_id=doc_id))
コード例 #7
0
    def add_rule_if_not_present(self, rule: dict):
        """
        Adds Rule to Rules DB if one with the same id is NOT already present.
        :param rule: Rule to add.
        """
        doc_id = generate_rule_id(rule)

        if not self.db.contains(doc_id=doc_id):
            self.db.insert(table.Document(rule, doc_id=doc_id))
コード例 #8
0
    def add_rule(self, rule: dict):
        """
        Adds Rule to Rules DB overwriting one with the same id if present.
        :param rule: Rule to add.
        """
        doc_id = generate_rule_id(rule)

        if self.db.contains(doc_id=doc_id):
            self.db.remove(doc_ids=[doc_id])
        self.db.insert(table.Document(rule, doc_id=doc_id))
コード例 #9
0
    def update_destination_for_uids(self, uids: list, destination: str):
        for uid in uids:
            doc_id = generate_id_from_uid(uid)
            if self.db.contains(doc_id=doc_id):
                tt = self.db.get(doc_id=doc_id)
                tt['destination_name'] = destination
                tt['description'] = tt['description'].split(
                    '- ')[0] + destination

                self.db.remove(doc_ids=[doc_id])
                self.db.insert(table.Document(tt, doc_id=doc_id))
コード例 #10
0
 def update_location_for_uids(self, uids: list, location_to_update: str,
                              keys_to_update: dict):
     for uid in uids:
         doc_id = generate_id_from_uid(uid)
         if self.db.contains(doc_id=doc_id):
             tt = self.db.get(doc_id=doc_id)
             for loc in tt['locations']:
                 if location_to_update in loc['location']:
                     for key in keys_to_update.keys():
                         loc[str(key)] = keys_to_update[key]
             self.db.remove(doc_ids=[doc_id])
             self.db.insert(table.Document(tt, doc_id=doc_id))
コード例 #11
0
    def update_category_for_uids(self, uids: list, category: str):
        for uid in uids:
            doc_id = generate_id_from_uid(uid)
            if self.db.contains(doc_id=doc_id):
                tt = self.db.get(doc_id=doc_id)
                tt['category'] = category

                tt['description'] = '{} {} - {} {}'.format(
                    tt['origin_time'], tt['origin_name'],
                    tt['destination_name'], tt['category'])

                self.db.remove(doc_ids=[doc_id])
                self.db.insert(table.Document(tt, doc_id=doc_id))
コード例 #12
0
 def put_tt_by_uid(self, uid: str, tt: dict) -> bool:
     """
     Overwrites TT with specified uid.
     :param uid: uid of the TT.
     :param tt: TT to replace with.
     :return: True if successfully replaced, False if not or no original record.
     """
     doc_id = generate_id_from_uid(uid)
     if self.db.contains(doc_id=doc_id):
         self.db.remove(doc_ids=[doc_id])
         self.db.insert(table.Document(tt, doc_id=doc_id))
         return True
     return False
コード例 #13
0
    def update_origin_for_uids(self, uids: list, origin: str,
                               origin_time: str):
        for uid in uids:
            doc_id = generate_id_from_uid(uid)
            if self.db.contains(doc_id=doc_id):
                tt = self.db.get(doc_id=doc_id)
                tt['origin_name'] = origin
                if origin_time is not None:
                    tt['origin_time'] = origin_time
                    tt['description'] = '{} {} -'.format(
                        origin_time, origin) + tt['description'].split('-')[1]
                else:
                    tt['description'] = '{} {} -'.format(
                        tt['origin_time'],
                        origin) + tt['description'].split('-')[1]

                self.db.remove(doc_ids=[doc_id])
                self.db.insert(table.Document(tt, doc_id=doc_id))
コード例 #14
0
# fs = FinnScraper("https://www.finn.no/realestate/homes/search.html?page={}")

try:
    finn_codes = fs.get_ad_codes(npages=25, verbose=1)
except Exception as e:
    send_message(f'Error: {str(e)}\n{str(e.__traceback__.tb_frame)}')
    if len(finn_codes) == 0:
        db.close()
        fs.close_driver()
        exit()

ad_counter = 0
for finn_code in tqdm(finn_codes):
    if db.contains(doc_id=finn_code):
        continue

    try:
        ad_dict = fs.parse_housing_page(finn_code)
        ad_dict['scraping_date'] = todays_date
    except Exception as e:
        send_message(f'Error: {str(e)}\n{str(e.__traceback__.tb_frame)}')
        continue

    db.insert(table.Document(ad_dict, doc_id=finn_code))
    ad_counter += 1

send_message(f'Added {ad_counter} new house data\nTotal Data: {len(db)}')

db.close()
fs.close_driver()