Beispiel #1
0
    def __init__(self, highonly=False, nohigh=False):
        self.g = nx.Graph()

        if highonly:
            jumps = get_all(g_highonly, {})
        elif nohigh:
            jumps = get_all(g_nohigh, {})
        else:
            jumps = get_all(g_all, {})

        self.g.add_edges_from(jumps)
Beispiel #2
0
    def __init__(self, highonly=False, nohigh=False):
        self.g = nx.Graph()

        if highonly:
            jumps = get_all(g_highonly, {})
        elif nohigh:
            jumps = get_all(g_nohigh, {})
        else:
            jumps = get_all(g_all, {})

        self.g.add_edges_from(jumps)
Beispiel #3
0
    def __init__(self, names=False, categories=[0], rigs=True, detail=-1):
        # This query gets inventable items. Categories is an array of category IDs. [0] is the
        # default and represents all categories. Expected categories: 6, 7, 8, 18, 22. If detail
        # is set it must be an itemid and that will be all the information that is returned.
        if len(categories) == 1 and categories[0] == 0:
            categories = [6, 7, 8, 18, 22]

        if detail != -1:
            self.inventable_items = [[detail]]
        elif rigs == False:
            self.inventable_items = get_all(g_inventable_no_rigs['sql'] % ','.join(['%s'] * len(categories)), tuple(categories))
        else:
            self.inventable_items = get_all(g_inventable_categories['sql'] % ','.join(['%s'] * len(categories)), tuple(categories))
        self.items = dict()
        self.names = names
 def get_all_orders(self):
     self.data = db.get_all("orders", [
         "id", "weight", "region", "delivery_hours", "assigned", "completed"
     ])
     for order in self.data:
         order["delivery_hours"] = json.loads(order["delivery_hours"])
     self.to_internal_value()
Beispiel #5
0
    def do_GET(self):
        o = urlsplit(self.path)

        # Only index
        if o.path != '/':
            self.send_error(404)
            return

        table_rows = []
        for i, x in reversed(list(enumerate(db.get_all(reversed=False), 1))):
            table_rows.append(f'''
            <tr>
                <td>{i}</td>
                <td>{x['date']}</td>
                <td>{str(x['cold']).zfill(5)}</td>
                <td>{str(x['hot']).zfill(5)}</td>
            </tr>
            ''')

        text = HTML_TEMPLATE_INDEX \
            .replace('{{ table_rows }}', ''.join(table_rows))

        self.send_response(200)
        self.send_header('Content-Type', 'text/html; charset=utf-8')
        self.send_header('Connection', 'close')
        self.end_headers()

        self.wfile.write(text.encode('utf-8'))
Beispiel #6
0
def request_things():
    uuid = request.form.get('uuid', False)
    category = request.form.get('category', False)
    item = request.form.get('item', False)
    email = request.form.get('email', False)
    try:
        image = request.files["image"]
    except KeyError:
        image = False
    ip = request.environ.get('HTTP_X_REAL_IP', request.remote_addr)
    print(ip)
    if uuid and category and item and email:
        location = None
        try:
            location = get_all()[uuid]['location']
        except KeyError:
            return "Error: disaster must be created before you can request things for it"
        if ip or ip == "76.112.42.21" or ip == "192.168.86.1" or ip == "192.168.86.41" or ip == "192.168.1.80":  # allow localhost and my IP to bypass IP verification
            request_id = add_request(uuid, category, item, email, image)
            thread = Thread(target=nlp,
                            kwargs={
                                'name': item,
                                'category': category,
                                'id': uuid,
                                'request_id': request_id
                            })
            thread.start()
            return 'Added'
        else:
            return 'Error: IP address does not appear to be from the location of the disaster, please ensure you are not using a VPN', 527

    else:
        return "Error: not all data was received"
Beispiel #7
0
def post_index():
    username = request.form.get('username')
    password = request.form.get('password')
    user = {'username': username, 'password': password}
    if user in get_all():
        session['username'] = username
        return 'đã đăng nhập thành công'
    return redirect(url_for('get_index'))
Beispiel #8
0
def get_food():
    """
  Hiển thị các món đang có
  """
    if "username" in session:
        return render_template('food.html', data=get_all())
    else:
        return redirect(url_for("login"))
Beispiel #9
0
def main():
    random_nonce = random_string(32)
    session['nonce'] = random_nonce

    return render_template("index.html",
                           data=db.get_all(),
                           zone=config.zone,
                           nonce=random_nonce)
Beispiel #10
0
def reminder():
    user_data = db.get_all()
    lesson = {'07:50': 0, '09:25': 1, '10:55': 2, '12:55': 3, '14:25': 4}

    for user in user_data:
        schedule_rem = day_reminder(read_schedule(user))[lesson[datetime.datetime.now().strftime('%H:%M')]]
        if schedule_rem == '':
            schedule_rem = f"{lesson[datetime.datetime.now().strftime('%H:%M')]} - Пари немає"
        bot.send_message(user[0], schedule_rem)
Beispiel #11
0
def get_all(conn):
    try:
        response = db.get_all(conn, db.PLANTS_TABLE)
        for i in range(len(response)):
            response[i]['otherInfo'] = db.get_by_foreign_key(
                conn, db.OTHER_INFO_TABLE, 'plant_id', response[i]['id'])
    except Exception as e:
        return dao_utils.handle_db_error(e)
    return endpoint_utils.handle_get_finished_successfully(response)
Beispiel #12
0
 def note_list():
     access = check_authorization(request)
     if access['valid']:
         conn = db.connect_database()
         notes = db.get_all(conn)
         return jsonify(notes), 200
     else:
         response = {'message': access['message']}
         return jsonify(response), 401
 def _get_categories(self) -> List[Category]:
     """ Returns a list of all the categories in the database """
     categories = db.get_all(
         "categories",
         ["category_codename", "category_name",
          "is_base_expense", "aliases"]
     )
     res_categories = self._fill_aliases(categories)
     return res_categories
Beispiel #14
0
def post_food():
    """
  Thêm một món ăn
  """
    food_name = request.form.get('name')
    food_price = request.form.get('price')
    food_image = request.form.get('image_url')
    add_food(food_name, food_price, food_image)

    return render_template('food.html', data=get_all())
Beispiel #15
0
def food(name: str):
    #uid = get_user()
    name = request.args.get('name')
    if name:
        food = db.get(collection="foods", resource_id=name)
        if food:
            return jsonify(food), 200
        return jsonify({"error:", f"{name} not found in database"}), 404
    else:
        return jsonify(db.get_all("foods")), 200
Beispiel #16
0
def pushStart():
    """
    Starts pushing-loop, preferably, this is started as a thread
    """
    
    for row in db.get_all(QueueRow):
        q.put(row)
           
    threading.Thread(target=queuePusher).start()
    threading.Thread(target=runDoneQueue).start()   
Beispiel #17
0
def list_command():
    dic_command = {}
    _, session = init_db()
    all_name = get_all(session)
    dic_command['name'] = []
    dic_command['filepath'] = []
    for command in all_name:
        dic_command['name'].append(command.command_name)
        dic_command['filepath'].append(command.filepath)

    print(tabulate(dic_command, headers='keys', tablefmt="pretty"))
Beispiel #18
0
def manage_accounts():
    if not current_user.is_superuser:
        return redirect('/')

    info = request.args.get('info', None)
    errs = []

    return render_template('auth/manage.html',
                           users=get_all(User),
                           errors=errs,
                           info=info)
def approve_all(message=None):
    global reddit
    posts = db.get_all()
    if posts != None:
        for entry in posts:
            submission = reddit.submission(entry.id)
            print(submission.title)
            if not check_post_deleted(submission):
                submission.mod.approve()
                if message != None:
                    submission.author.message(entry.url, message)
            db.remove_entry(entry.id)
Beispiel #20
0
    def datacores(self, item):
        typeid = item['typeID']
        item['datacores'] = []
        datacores = get_all(g_datacores['sql'], (typeid))

        for datacore in datacores:
            dc = {
                'typeID': datacore[g_datacores['typeID']],
                'quantity': datacore[g_datacores['quantity']]
            }

            if self.names:
                dc['typeName'] = datacore[g_datacores['typeName']]

            item['datacores'].append(dc);
Beispiel #21
0
async def register_slave(websocket, wrapper):
    print("Registering slave...")

    # build agent info
    resources = []
    for resource in wrapper.register_slave.slave.resources:
        if resource.type == messages_pb2.Value.SCALAR:
            resources.append(
                [resource.name, resource.type, resource.scalar.value])
        elif resource.type == messages_pb2.Value.SET:
            resources.append([resource.name, resource.type, resource.set.item])
        elif resource.type == messages_pb2.Value.RANGE:
            resources.append(
                [resource.name, resource.type, resource.ranges.range])
        elif resource.type == messages_pb2.Value.TEXT:
            resources.append(
                [resource.name, resource.type, resource.text.value])

    # build agent info
    attributes = []
    for resource in wrapper.register_slave.slave.attributes:
        if resource.type == messages_pb2.Value.SCALAR:
            attributes.append(
                [resource.name, resource.type, resource.scalar.value])
        elif resource.type == messages_pb2.Value.SET:
            attributes.append(
                [resource.name, resource.type, resource.set.item])
        elif resource.type == messages_pb2.Value.RANGE:
            attributes.append(
                [resource.name, resource.type, resource.ranges.range])
        elif resource.type == messages_pb2.Value.TEXT:
            attributes.append(
                [resource.name, resource.type, resource.text.value])

    # add agent to db
    agent_id = db.add_agent(resources, attributes, "webs")
    sockets[websocket] = agent_id
    print(db.get_all())

    # send response message
    response = messages_pb2.WrapperMessage()
    response.slave_registered.slave_id = str(agent_id)
    await websocket.send(response.SerializeToString())
Beispiel #22
0
def message(event):
    if event['message']['type'] != 'text':
        return
    text = event['message']['text'].split()
    reply_token = event['replyToken']
    if len(text) >= 3 and text[1] in ['->', '=', '==', '=']:
        line_api.reply_message(reply_token, 'success')
        db.insert(text[0], text[2])
        return
    # text2 = text[0].split('=')
    text2 = re.split(r'[==]', text[0])
    if len(text2) == 2 and len(text) == 1:
        line_api.reply_message(reply_token, 'success')
        db.insert(text2[0], text2[1])
        return
    reply = db.get_all()
    if text[0] in reply:
        line_api.reply_message(reply_token, reply[text[0]])
        return
Beispiel #23
0
    def do_GET(self):
        o = urlsplit(self.path)

        # Only index and ALLOW_LIST
        if o.path != '/' and o.path not in ALLOW_LIST:
            self.send_error(404)
            return

        if o.path in ALLOW_LIST:
            print('[o.path]', o.path)
            ext = get_ext(o.path)

            f = DIR / o.path.lstrip('/')
            data = f.read_bytes()

            self.send_response(200)
            self.send_header('Content-Type', MIME_BY_CONTENTYPE[ext])
            self.send_header('Content-length', len(data))
            self.end_headers()

            self.wfile.write(data)
            return

        table_rows = []
        for i, x in reversed(list(enumerate(db.get_all(reversed=False), 1))):
            table_rows.append(f'''
            <tr>
                <td>{i}</td>
                <td>{x['date']}</td>
                <td>{str(x['cold']).zfill(5)}</td>
                <td>{str(x['hot']).zfill(5)}</td>
            </tr>
            ''')

        text = HTML_TEMPLATE_INDEX \
            .replace('{{ table_rows }}', ''.join(table_rows))

        self.send_response(200)
        self.send_header('Content-Type', 'text/html; charset=utf-8')
        self.send_header('Connection', 'close')
        self.end_headers()

        self.wfile.write(text.encode('utf-8'))
Beispiel #24
0
    def on_get(self, req, resp):
        list_of_services = []
        services = db.get_all()
        query_filter = req.params
        for k, v in services.iteritems():
            if not self.filter_data(query_filter, v):
                continue

            service_summary = {}
            service_summary["name"] = v.get("name")
            service_summary["serviceType"] = v.get("serviceType")
            service_summary["serviceState"] = v.get("serviceState")
            service_summary["id"] = v.get("id")
            service_summary["href"] = v.get("href")
            service_summary["createdBy"] = v.get("createdBy")
            service_summary["createdOn"] = v.get("createdOn")
            list_of_services.append(service_summary)

        resp.status = falcon.HTTP_200
        resp.body = success_response(list_of_services)
Beispiel #25
0
def request(data, type):
    print 'request starts!'
    if (type == 'db_add_or_update'):
        print "type: db_add_or_update"
        db.add_or_update_no_push(data)
        gtk.gdk.threads_enter()
        gui.notify(data)
        gtk.gdk.threads_leave()
        
#    elif (type == 'textMessage'):
#        print "type: textMessage"
#        msg = TextMessage(data.src, data.dst, data.msg)
#        db.add_or_update(msg)
#        msg.src_object = db.get_one_by_id(Employee, msg.src)
#        msg.dst_object = db.get_one_by_id(Employee, msg.dst)
#        db.commit()
#        gtk.gdk.threads_enter()
#        gui.notify(msg)
#        gtk.gdk.threads_leave()

    elif (type == 'pong'):
        """
        Recieves a list with ID
        """
        
        print 'type: pong'
        onlineLista = data
        
        """
        Updates whos online in db
        """
        
        print 'dbupdate to false'
        for mongo in db.get_all(Employee):
            mongo.online=False
        
        print 'dbupdate to true from list'
        for id in onlineLista:
            user = db.get_one_by_id(Employee, id)
            user.online = True
        db.commit()
Beispiel #26
0
def message(event):
    if event['message']['type'] != 'text':
        return
    text = event['message']['text'].split()
    reply_token = event['replyToken']
    if len(text) >= 3 and text[1] in ['->', '=', '==', '=']:
        if re.match(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-]+', text[0]):
            return
        db.insert(text[0], text[2])
        line_api.reply_message(reply_token, 'success')
        return
    text2 = re.split(r'[==]', event['message']['text'])
    if len(text2) >= 2:
        if re.match(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-]+', text2[0]):
            return
        db.insert(text2[0], text2[1])
        line_api.reply_message(reply_token, 'success')
        return
    reply = db.get_all()
    if text[0] in reply:
        line_api.reply_message(reply_token, reply[text[0]])
        return
Beispiel #27
0
    def perfect_materials(self, item):
        typeid = item['typeID']
        item['perfectMaterials'] = []
        materials = get_all(g_perfect_materials['sql'], (typeid, typeid, typeid))

        # This dict will be used to keep track of which materials have been seen. If a material
        # is listed in the regular list (affected by ME waste) and it appears in the extra
        # material list (not affected by ME waste), it must have PE waste applied to it.
        # This also requires the result set from the materials query to be sorted with normal
        # materials first followed by extra materials.
        requiredMaterials = dict()

        for material in materials:
            if material[g_perfect_materials['quantity']] > 0:
                typeid = material[g_perfect_materials['typeID']]
                wasteME = bool(material[g_perfect_materials['waste']])

                # PE always applies if ME waste applies.
                wastePE = wasteME

                if typeid in requiredMaterials:
                    wastePE = True
                else:
                    requiredMaterials[typeid] = True

                pm = {
                    'typeID': typeid,
                    'quantity': float(material[g_perfect_materials['quantity']]),
                    'dmg': float(material[g_perfect_materials['dmg']]),
                    'wasteME': wasteME,
                    'wastePE': wastePE
                }

                if self.names:
                    pm['name'] = material[g_perfect_materials['name']]

                item['perfectMaterials'].append(pm)
def main():
    global reddit
    global has_bot_started
    print(f'{CLIENT_ID}')
    try:
        reddit = praw.Reddit(client_id=CLIENT_ID,
                             client_secret=CLIENT_SECRET,
                             user_agent=USER_AGENT,
                             username=USERNAME,
                             password=PASSWORD)
        print(f"Authenticated as {reddit.user.me()}")
    except Exception:
        print(
            'Invalid credentials, please enter correct credentials into {CREDENTIALS_FILENAME}'
        )
        return

    try:

        subreddit = reddit.subreddit(SUBREDDIT)
        stream = subreddit.stream.submissions(pause_after=0)
        early_posts = set()

        db.initialize_database()
        approve_all()
        print('Approved remaining posts, waiting 5 seconds before startup:')
        time.sleep(5)

        # Ingore all older posts
        for submission in stream:
            if submission == None:
                break
        has_bot_started = True
        while True:

            ignored_posts = set()

            for submission in early_posts:
                if submission == None:
                    break
                updated_submission = reddit.submission(submission)
                if check_post_deleted(updated_submission):
                    ignored_posts.add(submission)
                    continue

                # Check if automoderator has commented
                automoderator = mod_in_comments(updated_submission, MODERATOR)
                if automoderator:
                    process_submission(reddit, updated_submission)
                    ignored_posts.add(submission)

            # Clear out ignored_posts
            for submission in ignored_posts:
                early_posts.remove(submission)

            # Check new submissions
            for submission in stream:
                if submission == None:
                    break
                if check_post_deleted(submission):
                    continue

                # Check if automoderator has commented
                automoderator = mod_in_comments(submission, MODERATOR)
                if not automoderator:
                    early_posts.add(submission.id)
                else:
                    process_submission(reddit, submission)

            # Check inbox
            for message in reddit.inbox.unread(limit=None):
                if isinstance(message, praw.models.Message):
                    print(f'Processing new message: {message.subject[4:]}')
                    # Mark as read so it won't do it again later
                    process_message(reddit, message)
                    message.mark_read()

            # Remove all old posts from database
            posts = db.get_all()
            if posts != None:
                for entry in posts:
                    submission = reddit.submission(entry.id)
                    if check_post_deleted(submission):
                        db.remove_entry(entry.id)
                    elif hour_difference(submission.created_utc,
                                         time.time()) >= MAX_TIME:
                        db.remove_entry(entry.id)
                        reddit.redditor(
                            entry.author).message(OLD_RESPONSE_MESSAGE)
    except Exception:
        print('An error has occurred, approving all and restarting...')
        try:
            approve_all()
        except Exception:
            print(
                'Approving failed, closing bot and sending modmail describing situation'
            )
            reddit.subreddit(SUBREDDIT).message(UNEXPECTED_SHUTDOWN_MESSAGE)
            return
        time.sleep(60)
        main()
Beispiel #29
0
def run(phrase):
    # Deduplicate companies, find common companies and contacts
    if phrase == 'p1':
        print('Phrase 1: Deduplicate companies, find common companies and contacts.')

        company_raw_list = pd.read_excel(rawfilepath, sheet_name='Company', sort=False, dtype=str)
        contact_raw_list = pd.read_excel(rawfilepath, sheet_name='Contact', sort=False, dtype=str)
        company_init_list = vd.init_list(company_raw_list, company_colnames, 'Company')
        contact_init_list = vd.init_list(contact_raw_list, contact_colnames, 'Contact', sourcename, timestamp)
        company_common_list, contact_common_list = vd.validate_common(company_init_list, contact_init_list)
        company_duplicate_list, company_duplicate_full, company_common_list, contact_common_list = vd.dedup_company(company_common_list, contact_common_list)

        reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl')
        backupwriter = pd.ExcelWriter(backupfilepath, engine='openpyxl')
        company_duplicate_list.to_excel(reviewwriter, index=False, header=True, columns=company_dup_colnames, sheet_name='1_Duplicate_Company')
        company_duplicate_full.to_excel(reviewwriter, index=False, header=True, columns=company_dup_colnames, sheet_name='1_Duplicate_Company_Full')
        company_common_list.to_excel(backupwriter, index=False, header=True, columns=company_colnames, sheet_name='company_common_list')
        contact_common_list.to_excel(backupwriter, index=False, header=True, columns=contact_colnames, sheet_name='contact_common_list')
        reviewwriter.save()
        reviewwriter.close()
        backupwriter.save()
        backupwriter.close()

        print('Check {}, {}, deduplicate companies need review. {} contains full list of duplicate companies.'.format(reviewfilepath, '1_Duplicate_Company', '1_Duplicate_Company_Full'))
        print('{} companies are duplicates in this load.'.format(len(company_duplicate_list)))

    # Merge deduplicate companies and format relative contacts
    elif phrase == 'p2':
        print('Phrase 2: Merge deduplicate companies and clean relative contacts.')

        company_common_list = pd.read_excel(backupfilepath, sheet_name='company_common_list', sort=False)
        contact_common_list = pd.read_excel(backupfilepath, sheet_name='contact_common_list', sort=False)
        company_duplicate_list = pd.read_excel(reviewfilepath, sheet_name='1_Duplicate_Company', sort=False)
        company_dedup_list, contact_format_list = vd.dedup_fix(company_common_list, contact_common_list, company_duplicate_list)
        company_db_return = db.get_all(company_load_colnames, 'Company')
        if company_db_return.empty:
            company_existing_list = company_db_return
        else:
            company_dedup_list, company_existing_list = vd.dedup_comany_db(company_dedup_list, company_db_return)
        company_dedup_list = vd.map_state(company_dedup_list)
        backupwriter = pd.ExcelWriter(backupfilepath, engine='openpyxl')
        reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl')
        backupbook = load_workbook(backupwriter.path)
        reviewbook = load_workbook(reviewwriter.path)
        backupwriter.book = backupbook
        reviewwriter.book = reviewbook
        company_existing_list.to_excel(reviewwriter, index=False, header=True, columns=company_colnames, sheet_name='2_Existing_Company')
        company_dedup_list.to_excel(backupwriter, index=False, header=True, columns=list(company_dedup_list), sheet_name='company_dedup_list')
        contact_format_list.to_excel(backupwriter, index=False, header=True, columns=contact_colnames, sheet_name='contact_format_list')
        backupwriter.save()
        backupwriter.close()
        reviewwriter.save()
        reviewwriter.close()
        print('Check {}, {}.'.format(reviewfilepath, '2_Existing_Company'))
        print('{} companies already exists in local staging table.'.format(len(company_existing_list)))

    # Run web scraper to enrich company details
    elif phrase == 'p3':
        print('Phrase 3: Run web scraper to enrich company details.')

        company_dedup_list = pd.read_excel(backupfilepath, sheet_name='company_dedup_list', sort=False)

        company_scrapy_return = qichacha(company_dedup_list[company_dedup_list['db_New'] != False], scrapyfilepath, 'company_scrapy_return')
        company_scrapy_return.to_excel(scrapyfilepath, index=False, header=True, columns=list(company_scrapy_return), sheet_name='company_scrapy_return')

    # Enrich companies with web scraper returns
    elif phrase == 'p4':
        print('Phrase 4: Enrich companies with web scraper returns')

        company_scrapy_return = pd.read_excel(scrapyfilepath, sheet_name='company_scrapy_return', sort=False)
        company_scrapy_return = vd.init_list(company_scrapy_return, list(company_scrapy_return))
        company_scrapy_return['Confidence'] = company_scrapy_return.apply(getConfidence, axis=1)
        company_scrapy_return['境外公司'] = company_scrapy_return['境外公司'].replace({0: False, 1: True})
        company_scrapy_return.to_excel(scrapyfilepath, index=False, header=True, columns=list(company_scrapy_return), sheet_name='company_scrapy_return')
        company_dedup_list = pd.read_excel(backupfilepath, sheet_name='company_dedup_list', sort=False)
        company_scrapy_list, company_scrapy_verify = vd.enrich_company(company_dedup_list, company_scrapy_return, company_colnames)

        backupwriter = pd.ExcelWriter(backupfilepath, engine='openpyxl')
        reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl')
        backupbook = load_workbook(backupwriter.path)
        reviewbook = load_workbook(reviewwriter.path)
        backupwriter.book = backupbook
        reviewwriter.book = reviewbook
        company_scrapy_list.to_excel(backupwriter, index=False, header=True, columns=company_colnames, sheet_name='company_scrapy_list')
        company_scrapy_verify.to_excel(reviewwriter, index=False, header=True, columns=company_colnames, sheet_name='3_No_Address_Company')
        backupwriter.save()
        backupwriter.close()
        reviewwriter.save()
        reviewwriter.close()
        # db.load_staging(company_scrapy_return, list(company_scrapy_return), 'Scrapy', sourcename, timestamp)

        print('Check {}, {}, enrich companies without address.'.format(reviewfilepath, '3_No_Address_Company'))
        print('{} companies remain no address.'.format(len(company_scrapy_verify)))
    # Enrich company with business return, validate contact
    elif phrase == 'p5':
        print('Phrase 5: Enrich company with business return, validate contact.')

        company_business_return = pd.read_excel(reviewfilepath, sheet_name='3_No_Address_Company', sort=False)
        company_scrapy_list = pd.read_excel(backupfilepath, sheet_name='company_scrapy_list', sort=False)
        contact_format_list = pd.read_excel(backupfilepath, sheet_name='contact_format_list', sort=False)
        company_load_list = vd.enrich_business(company_scrapy_list, company_business_return)
        contact_db_return = db.get_all(contact_load_colnames, 'Contact')
        if contact_db_return.empty:
            contact_dedup_list = contact_format_list
        else:
            contact_dedup_list = vd.dedup_contact_db(contact_format_list, contact_db_return)
        contact_validate_list = vd.validate_contacts(contact_dedup_list, contact_colnames, company_load_list)
        company_load_list = company_load_list[company_load_list['Load'] == True]

        db.load_staging(company_load_list, company_load_colnames, 'Company', sourcename, timestamp)
        reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl')
        reviewbook = load_workbook(reviewwriter.path)
        reviewwriter.book = reviewbook
        contact_validate_list.to_excel(reviewwriter, index=False, header=True, columns=contact_colnames, sheet_name='4_Validate_Contact')
        company_load_list.to_excel(reviewwriter, index=False, header=True, columns=company_load_colnames, sheet_name='5_Company_Load')
        reviewwriter.save()
        reviewwriter.close()


        print('Check {}, {}, contacts need to review.'.format(reviewfilepath, '4_Validate_Contact'))
        print('{} companies load into staging table.'.format(len(company_load_list)))
        print('{} contacts needs review'.format(len(contact_validate_list[contact_validate_list['Load'] == False])))

    # Enrich contacts with business return
    elif phrase == 'p6':
        contact_business_list = pd.read_excel(reviewfilepath, sheet_name='4_Validate_Contact', sort=False)
        contact_load_list = contact_business_list[contact_business_list['Load'] == True]
        reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl')
        reviewbook = load_workbook(reviewwriter.path)
        reviewwriter.book = reviewbook
        contact_load_list.to_excel(reviewwriter, index=False, header=True, columns=contact_load_colnames, sheet_name='5_Contact_Load')
        reviewwriter.save()
        reviewwriter.close()
        db.load_staging(contact_load_list, contact_load_colnames, 'Contact', sourcename, timestamp)

        print('{} contacts load into staging table.'.format(len(contact_load_list[contact_load_list['Load'] != False])))
    elif phrase == 'p7':
        print('Cross-check and log merge, deletion, modification record.')
        company_raw_list = pd.read_excel(rawfilepath, sheet_name='Company', sort=False)
        contact_raw_list = pd.read_excel(rawfilepath, sheet_name='Contact', sort=False)
        contact_raw_list['Source_ID'] = list(range(1, (len(contact_raw_list) + 1)))
        contact_raw_list['Source_ID'] = contact_raw_list['Source_ID'].apply(lambda x: sourcename + '_' + timestamp + '_' + 'Contact' + '_' + str(x))
        company_load_list = pd.read_excel(reviewfilepath, sheet_name='5_Company_Load', sort=False)
        contact_load_list = pd.read_excel(reviewfilepath, sheet_name='5_Contact_Load', sort=False)
        company_logs = vd.staging_log(company_raw_list, company_load_list, 'Company', logs_columns)
        db.load_staging(company_logs, logs_columns, 'Logs', sourcename, timestamp)
        contact_logs = vd.staging_log(contact_raw_list, contact_load_list, 'Contact', logs_columns)
        db.load_staging(contact_logs, logs_columns, 'Logs', sourcename, timestamp)

        company_duplicate_list = pd.read_excel(reviewfilepath, sheet_name='1_Duplicate_Company_Full', sort=False)
        company_existing_list = pd.read_excel(reviewfilepath, sheet_name='2_Existing_Company', sort=False)
        company_standard_list = pd.read_excel(reviewfilepath, sheet_name='3_No_Address_Company', sort=False)
        company_summary = vd.staging_summary('Company', company_raw_list, company_duplicate_list, company_existing_list, company_standard_list, company_load_list)
        db.load_staging(company_summary, list(company_summary), 'Summary', sourcename, timestamp)
        contact_validate_list = pd.read_excel(reviewfilepath, sheet_name='4_Validate_Contact', sort=False)
        contact_duplicate_list = contact_validate_list[contact_validate_list['vc_Deduplicate'] == False]
        contact_existing_list = contact_validate_list[contact_validate_list['db_New'] == False]
        contact_standard_list = contact_validate_list[contact_validate_list['Load'] == False]
        contact_summary = vd.staging_summary('Contact', contact_raw_list, contact_duplicate_list, contact_existing_list, contact_standard_list, contact_load_list)
        db.load_staging(contact_summary, list(contact_summary), 'Summary', sourcename, timestamp)
#compare and generate  diff dict.
new = fo.get_all_file(ROOT+"/new_version")
old = fo.get_all_file(ROOT+"/old_version")
basic_diff_list = fo.basic_diff(new,old)
all_diff_list   = fo.basic_diff(new,{})



fo.all_to_temp(all_diff_list)
db.update_all_index(all_diff_list)


# sync db
db.pub_new(latest_version)    
db.update_index(latest_version,basic_diff_list)
                              

QuerySet = db.get_all()
for Query in QuerySet:
    if Query['pathindex'] != "[]":
        try:
            os.mkdir("version_"+Query["version"])
        except OSError:
            pass

# generate & copy files  
print QuerySet
fo.make_file_from_db(QuerySet)
fo.update_all_version(QuerySet)
Beispiel #31
0
 def get(self, lang):
     doc = db.get_all(lang)
     if doc is None:
         return None, 404
     return Response(json.dumps(doc), mimetype='application/json')
Beispiel #32
0
def show():
    all_data = db.get_all()
    print(all_data)


# def display(weather, restaurant, events):
Beispiel #33
0
def run(phrase):
    # Deduplicate companies, find common companies and contacts
    if phrase == 'p1':
        print('Phrase 1: Deduplicate companies, find common companies and contacts.')

        company_raw_list = pd.read_excel(rawfilepath, sheet_name='Company', sort=False, dtype=str)
        contact_raw_list = pd.read_excel(rawfilepath, sheet_name='Contact', sort=False, dtype=str)
        # Initialization
        company_init_list = vd.init_list(company_raw_list, company_colnames, mode='Company')
        contact_init_list = vd.init_list(contact_raw_list, contact_colnames, mode='Contact', sourcename=sourcename, timestamp=timestamp, company=company_init_list)
        # Deduplication within source data
        company_common_list, contact_common_list = vd.validate_common(company_init_list, contact_init_list)
        # Map state abbreviation and enrich state
        company_common_list = vd.map_state(company_common_list)
        company_duplicate_list, company_duplicate_full, company_dedup_list, contact_common_list = vd.dedup_company(company_common_list, contact_common_list)
        # Deduplication against staging table
        company_db_return = db.get_all(company_load_colnames, 'Company')
        if company_db_return.empty:
            company_existing_list = company_db_return
        else:
            company_dedup_list, company_existing_list = vd.dedup_comany_db(company_dedup_list, company_db_return)
        # Keep companies not duplicates
        # company_dedup_list = company_dedup_list[company_dedup_list['Load'] == True]

        print('Check {}, {}, deduplicate companies need review. {} contains full list of duplicate companies.'.format(reviewfilepath, '1_Duplicate_Company', '1_Duplicate_Company_Full'))
        print('{} companies are duplicates in this load.'.format(len(company_duplicate_list)))
        print('Check {}, {}.'.format(reviewfilepath, '2_Existing_Company'))
        print('{} companies already exists in local staging table.'.format(len(company_existing_list)))

        backupwriter = pd.ExcelWriter(backupfilepath, engine='openpyxl')
        reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl')
        company_duplicate_list.sort_values(by=['ComName_temp']).to_excel(reviewwriter, index=False, header=True, columns=company_dup_colnames, sheet_name='1_Duplicate_Company')
        company_duplicate_full.sort_values(by=['ComName_temp']).to_excel(reviewwriter, index=False, header=True, columns=company_dup_colnames, sheet_name='1_Duplicate_Company_Full')
        company_existing_list.sort_values(by=['ComName_temp']).to_excel(reviewwriter, index=False, header=True, columns=list(company_existing_list), sheet_name='2_Existing_Company')
        company_dedup_list.to_excel(backupwriter, index=False, header=True, columns=company_colnames, sheet_name='company_dedup_list')
        contact_common_list.to_excel(backupwriter, index=False, header=True, columns=contact_colnames, sheet_name='contact_common_list')
        backupwriter.save()
        backupwriter.close()
        reviewwriter.save()
        reviewwriter.close()

    # Run web scraper to enrich company details
    elif phrase == 'p2':
        print('Phrase 2: Run web scraper to enrich company details.')

        company_dedup_list = pd.read_excel(backupfilepath, sheet_name='company_dedup_list', sort=False)
        # company_dedup_list = company_dedup_list[company_dedup_list['db_New'] != False]
        company_dedup_list = company_dedup_list[(company_dedup_list['db_New'] != False) & (company_dedup_list['Load'] == True) & pd.isnull(company_dedup_list['Billing_Address']) & pd.isnull(company_dedup_list['Billing_Address_CN'])]
        company_scrapy_return = qichacha(company_dedup_list, scrapyfilepath, 'company_scrapy_return')
        company_scrapy_return.to_excel(scrapyfilepath, index=False, header=True, columns=list(company_scrapy_return), sheet_name='company_scrapy_return')

    # Enrich companies with web scraper returns,  validate contact
    elif phrase == 'p3':
        print('Phrase 3: Enrich companies with web scraper returns')

        company_scrapy_return = pd.read_excel(scrapyfilepath, sheet_name='company_scrapy_return', sort=False)
        company_scrapy_return = vd.init_list(company_scrapy_return, list(company_scrapy_return), mode='')
        company_scrapy_return['Confidence'] = company_scrapy_return.apply(getConfidence, axis=1)
        company_scrapy_return['境外公司'] = company_scrapy_return['境外公司'].replace({0: False, 1: True})
        company_scrapy_return.to_excel(scrapyfilepath, index=False, header=True, columns=list(company_scrapy_return), sheet_name='company_scrapy_return')
        company_dedup_list = pd.read_excel(backupfilepath, sheet_name='company_dedup_list', sort=False)
        company_scrapy_list, company_scrapy_verify = vd.enrich_company(company_dedup_list, company_scrapy_return, company_colnames)
        company_scrapy_verify = company_scrapy_verify[(company_scrapy_verify['vc_Deduplicate'] == True) & (company_scrapy_verify['db_New'] == True)]
        print('Check {}, {}, enrich companies without address.'.format(reviewfilepath, '3_No_Address_Company'))
        print('{} companies remain no address.'.format(len(company_scrapy_verify)))
        print('Phrase 4: Validate contact.')

        # Validate contact
        contact_common_list = pd.read_excel(backupfilepath, sheet_name='contact_common_list', sort=False)
        contact_db_return = db.get_all(contact_load_colnames, 'Contact')
        if contact_db_return.empty:
            contact_dedup_list = contact_common_list
        else:
            contact_dedup_list = vd.dedup_contact_db(contact_common_list, contact_db_return)

        contact_validate_list = vd.validate_contacts(contact_dedup_list, contact_colnames, company_scrapy_list)
        contact_review_list = contact_validate_list[contact_validate_list['Load'] == False]
        contact_validate_list = contact_validate_list[contact_validate_list['Load'] == True]

        backupwriter = pd.ExcelWriter(backupfilepath, engine='openpyxl')
        reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl')
        backupbook = load_workbook(backupwriter.path)
        reviewbook = load_workbook(reviewwriter.path)
        backupwriter.book = backupbook
        reviewwriter.book = reviewbook
        company_scrapy_list.to_excel(backupwriter, index=False, header=True, columns=company_colnames, sheet_name='company_scrapy_list')
        company_scrapy_verify.sort_values(by=['ComName_temp']).to_excel(reviewwriter, index=False, header=True, columns=company_colnames, sheet_name='3_No_Address_Company')
        contact_validate_list.sort_values(by=['First_Name', 'Last_Name', 'First_Name_CN', 'Last_Name_CN']).to_excel(backupwriter, index=False, header=True, columns=contact_load_colnames, sheet_name='contact_validate_list')
        contact_review_list.to_excel(reviewwriter, index=False, header=True, columns=contact_colnames, sheet_name='4_Validate_Contact')
        backupwriter.save()
        backupwriter.close()
        reviewwriter.save()
        reviewwriter.close()

        print('Check {}, {}, contacts need to review.'.format(reviewfilepath, '4_Validate_Contact'))
        print('{} contacts needs review'.format(len(contact_review_list)))

    # Enrich companies, contacts with business return, load company and contact into staging table
    elif phrase == 'p4':
        print('Phrase 5: Enrich companies')
        company_duplicate_review = pd.read_excel(reviewfilepath, sheet_name='1_Duplicate_Company', sort=False)
        company_existing_review = pd.read_excel(reviewfilepath, sheet_name='2_Existing_Company', sort=False)
        company_address_review = pd.read_excel(reviewfilepath, sheet_name='3_No_Address_Company', sort=False)
        company_scrapy_list = pd.read_excel(backupfilepath, sheet_name='company_scrapy_list', sort=False)

        # Enrich companies from business review
        company_load_list = vd.enrich_business(company_scrapy_list, company_duplicate_review)
        company_load_list = vd.enrich_business(company_load_list, company_existing_review)
        # Enrich companies without address
        company_load_list = vd.enrich_no_address(company_load_list, company_address_review)
        company_load_list = vd.enrich_business(company_load_list, company_address_review)
        company_min_drop = company_address_review[company_address_review['Load'] == False]
        print('Check {}, {}, companies cannot meet minimum standard.'.format(reviewfilepath, '5_Company_Drop'))
        print('{} companies are dropped'.format(len(company_min_drop)))

        # Merge deduplicate companies and format relative contacts
        print('Phrase 6: Merge deduplicate companies and clean relative contacts. Enrich contacts with business return.')
        contact_validate_review = pd.read_excel(reviewfilepath, sheet_name='4_Validate_Contact', sort=False)
        contact_validate_list = pd.read_excel(backupfilepath, sheet_name='contact_validate_list', sort=False)
        contact_load_list = vd.enrich_business(contact_validate_list, contact_validate_review)
        temp, contact_load_list = vd.dedup_fix(company_load_list, contact_load_list, company_duplicate_review)
        temp, contact_load_list = vd.dedup_fix(company_load_list, contact_load_list, company_existing_review)
        contact_load_list = vd.enrich_contact(company_load_list, contact_load_list, company_load_colnames)
        # contact_load_list = contact_load_list[contact_load_list['Load'] != False]
        contact_load_list.loc[~contact_load_list['Source_Company_ID'].isin(company_load_list['Source_ID'].tolist()), 'Load'] = False
        contact_no_company = contact_load_list[~contact_load_list['Source_Company_ID'].isin(company_load_list['Source_ID'].tolist())]
        contact_no_company['Reject_Reason'] = 'No company;  '
        contact_min_list = contact_validate_list[contact_validate_list['Load'] == False]
        contact_min_list = contact_min_list.append(contact_no_company)
        print('Check {}, {}, contacts  cannot meet minimum standard.'.format(reviewfilepath, '5_Contact_Drop'))
        print('{} contacts are dropped, because companies are dropped.'.format(len(contact_min_list)))

        # Load company and contact into staging table
        print('Phrase 7:  Load company and contact into staging table.')
        db.load_staging(company_load_list, company_load_colnames, 'Company', sourcename, timestamp)
        db.load_staging(contact_load_list[contact_load_list['Load']!=False], contact_load_colnames, 'Contact', sourcename, timestamp)
        print('{} companies load into staging table.'.format(len(company_load_list)))
        print('{} contacts load into staging table.'.format(len(contact_load_list)))

        print('Phrase 8: Cross-check and log merge, deletion, modification record.')
        # Loading logs
        company_raw_list = pd.read_excel(rawfilepath, sheet_name='Company', sort=False)
        contact_raw_list = pd.read_excel(rawfilepath, sheet_name='Contact', sort=False)
        contact_raw_list['Source_ID'] = list(range(1, (len(contact_raw_list) + 1)))
        contact_raw_list['Source_ID'] = contact_raw_list['Source_ID'].apply(lambda x: sourcename + '_' + timestamp + '_' + 'Contact' + '_' + str(x))
        company_logs = vd.staging_log(company_raw_list, company_load_list, 'Company', logs_columns)
        db.load_staging(company_logs, logs_columns, 'Logs', sourcename, timestamp)
        contact_logs = vd.staging_log(contact_raw_list, contact_load_list, 'Contact', logs_columns)
        db.load_staging(contact_logs, logs_columns, 'Logs', sourcename, timestamp)

        # Loading summary
        company_duplicate_list = pd.read_excel(reviewfilepath, sheet_name='1_Duplicate_Company_Full', sort=False)
        company_existing_list = pd.read_excel(reviewfilepath, sheet_name='2_Existing_Company', sort=False)
        company_standard_list = pd.read_excel(reviewfilepath, sheet_name='3_No_Address_Company', sort=False)
        company_summary = vd.staging_summary('Company', company_raw_list, company_duplicate_list, company_existing_list, company_standard_list, company_load_list)
        db.load_staging(company_summary, list(company_summary), 'Summary', sourcename, timestamp)
        contact_validate_list = pd.read_excel(reviewfilepath, sheet_name='4_Validate_Contact', sort=False)
        contact_duplicate_list = contact_validate_list[contact_validate_list['vc_Deduplicate'] == False]
        contact_existing_list = contact_validate_list[contact_validate_list['db_New'] == False]
        contact_standard_list = contact_validate_list[contact_validate_list['Load'] == False]
        contact_summary = vd.staging_summary('Contact', contact_raw_list, contact_duplicate_list, contact_existing_list, contact_standard_list, contact_load_list)
        db.load_staging(contact_summary, list(contact_summary), 'Summary', sourcename, timestamp)
        # db.load_staging(company_scrapy_return, list(company_scrapy_return), 'Scrapy', sourcename, timestamp)

        reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl')
        reviewbook = load_workbook(reviewwriter.path)
        reviewwriter.book = reviewbook
        company_min_drop.to_excel(reviewwriter, index=False, header=True, columns=company_load_colnames, sheet_name='5_Company_Drop')
        contact_min_list.to_excel(reviewwriter, index=False, header=True, columns=company_load_colnames, sheet_name='5_Contact_Drop')
        company_load_list.to_excel(reviewwriter, index=False, header=True, columns=company_load_colnames, sheet_name='6_Company_Load')
        contact_load_list.to_excel(reviewwriter, index=False, header=True, columns=contact_load_colnames, sheet_name='6_Contact_Load')
        reviewwriter.save()
        reviewwriter.close()
        print('---------- Done.---------- ')
    elif phrase == 'Reload':
        company_load_list = pd.read_excel(reviewfilepath, sheet_name='6_Company_Load', sort=False)
        contact_load_list = pd.read_excel(reviewfilepath, sheet_name='6_Contact_Load', sort=False)
        db.load_staging(company_load_list, company_load_colnames, 'Company', sourcename, timestamp)
        db.load_staging(contact_load_list, contact_load_colnames, 'Contact', sourcename, timestamp)

        # Loading logs
        company_raw_list = pd.read_excel(rawfilepath, sheet_name='Company', sort=False)
        contact_raw_list = pd.read_excel(rawfilepath, sheet_name='Contact', sort=False)
        contact_raw_list['Source_ID'] = list(range(1, (len(contact_raw_list) + 1)))
        contact_raw_list['Source_ID'] = contact_raw_list['Source_ID'].apply(lambda x: sourcename + '_' + timestamp + '_' + 'Contact' + '_' + str(x))
        company_logs = vd.staging_log(company_raw_list, company_load_list, 'Company', logs_columns)
        db.load_staging(company_logs, logs_columns, 'Logs', sourcename, timestamp)
        contact_logs = vd.staging_log(contact_raw_list, contact_load_list, 'Contact', logs_columns)
        db.load_staging(contact_logs, logs_columns, 'Logs', sourcename, timestamp)

        # Loading summary
        company_duplicate_list = pd.read_excel(reviewfilepath, sheet_name='1_Duplicate_Company_Full', sort=False)
        company_existing_list = pd.read_excel(reviewfilepath, sheet_name='2_Existing_Company', sort=False)
        company_standard_list = pd.read_excel(reviewfilepath, sheet_name='3_No_Address_Company', sort=False)
        company_summary = vd.staging_summary('Company', company_raw_list, company_duplicate_list, company_existing_list, company_standard_list, company_load_list)
        db.load_staging(company_summary, list(company_summary), 'Summary', sourcename, timestamp)
        contact_validate_list = pd.read_excel(reviewfilepath, sheet_name='4_Validate_Contact', sort=False)
        contact_duplicate_list = contact_validate_list[contact_validate_list['vc_Deduplicate'] == False]
        contact_existing_list = contact_validate_list[contact_validate_list['db_New'] == False]
        contact_standard_list = contact_validate_list[contact_validate_list['Load'] == False]
        contact_summary = vd.staging_summary('Contact', contact_raw_list, contact_duplicate_list, contact_existing_list, contact_standard_list, contact_load_list)
        db.load_staging(contact_summary, list(contact_summary), 'Summary', sourcename, timestamp)
Beispiel #34
0
def all_disaster_data():
    return jsonify(get_all())
Beispiel #35
0
def favourites():
    all = db.get_all()
    return render_template("favs.html", all=all)
def get_20_results():
    return db.get_all(20)
Beispiel #37
0
 def collection_get(self):
     return get_all(delim=['name','image','agency','description','date'])