Esempio n. 1
0
def main():
    ts = time()
    with session_scope() as session:
        bbb_urls = [x[0] for x in session.query(Company.bbb_url).distinct()]

    queue = Queue()

    for i in range(8):
        worker = SqlUpdateWorker(queue)
        worker.daemon = True
        worker.start()

    for url in bbb_urls:
        pro = pro_coll.find_one({"bbb_url": url})
        if pro:
            g_id = pro["google_id"]
            if g_id == "NoPhoneMatch" or g_id == "NoAddressMatch":
                g_id = None
        else:
            logger.warning(f"NOT IN MONGO: {url}")
        queue.put((url, "google_place_id", g_id, logger))

    queue.join()

    print(f"Time: {time() - ts}")
    def run(self):
        while True:
            try:
                url, column, value = self.queue.get()
                with session_scope() as session:

                    _id = (session.query(
                        Company.id).filter(Company.bbb_url == url).scalar())
                print(pro_id)
                pro_coll.update_one(
                    {"bbb_url": url},
                    {"$set": {
                        f"{sys.argv[1]}_sql_id": pro_id
                    }})
            finally:
                self.queue.task_done()
    def run(self):
        while True:
            try:
                url, column, value, logger = self.queue.get()

                with session_scope(logger=logger, mode="update") as session:
                    query = list(
                        session.query(Company).filter(Company.bbb_url == url))
                    if len(query) == 0:
                        logger.info(f"{url} is not in prod db")
                    elif len(query) == 1:
                        company = query[0]
                        logger.info(f"ID: {company.id} has updated {column}")
                        session.query(Company).filter(
                            Company.id == company.id).update({column: value})
                    elif len(query) >= 2:
                        for c in query:
                            logger.info(f"ID: {c.id} has updated {column}")
                            session.query(Company).filter(
                                Company.id == c.id).update({column: value})
            finally:
                self.queue.task_done()
            continue
        try_again = input("need another try?")
        if try_again == "y":
            try:
                log_message = merge_pro(log_message, columns, dlicates, p_ind,
                                        keep_id, sql_ids)
            except:
                raise
        else:
            pass

        ## pop keep id before using sql_ids to delete duplicates
        _ = sql_ids.pop(sql_ids.index(keep_id))

        log_message = log_message + f"\tDELETING IDS - {sql_ids}"
        # mlogger.warning(log_message)

        ### delete pros not used
        with session_scope(logger=mlogger) as session:
            for i in sql_ids:
                log_message = log_message + f"\tDELETING - {i}"
                del_pro = session.query(Company).filter(
                    Company.id == i).first()
                session.delete(del_pro)

        # log operation and clear the terminal
        mlogger.warning(log_message)
        clear()

    count += 1
            "in_production": True,
            "is_accredited": True
        },
        {
            "_id": False,
            "bbb_url": True,
            "is_accredited": True
        },
    )
]

##5988 unique bbb_urls in production according to mongo db
# check prod in heroku query tool!

for p in accredited_pros:
    with session_scope() as session:
        query = list(
            session.query(Company).filter(
                Company.bbb_url == p["bbb_url"]).all())

        if len(query) == 0:
            logger.info(
                f"{p['bbb_url']} is not in prod db or has accreditation data")
        elif len(query) == 1:
            logger.info(f"{p['bbb_url']} has updated accreditation data!")
            company = query[0]
            company.is_accredited = True
        elif len(query) >= 2:
            logger.info(f"{p['bbb_url']} has updated accreditation data!")
            logger.warning(
                f"{p['bbb_url']} has {len(query)} duplicate rows in the db")
Esempio n. 6
0
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# map company table
Company = Base.classes["companies_company"]
Service = Base.classes["commons_servicetype"]
CompanyService = Base.classes["companies_companyservicetype_service_type"]

## connect to pymongo
client = MongoClient()
db = client["ProData"]
pro_coll = db["ProDataCollection"]

# create conversion to dict to match my alpha version of the service name to the
# service_id in the db
with session_scope(logger) as session:
    services = [x._asdict() for x in session.query(Service.name, Service.id)]

service_dict = {clean_special(clean_str(x["name"])): x["id"] for x in services}

sqlre = re.compile("(sql_id:)\s\d\d\d\d\d")
mongore = re.compile("(pro_mongo_id\s:)\s\w*")
namere = re.compile("('name':)\s'(.*?)'")

uploaded_ids = []
names = []
with open("./log_files/staging_accredited_load.log") as file:
    for row in file.readlines():
        if mongore.findall(row):
            uploaded_ids.append(row.split(":")[1].strip())