def __init__(self, highonly=False, nohigh=False): self.g = nx.Graph() if highonly: jumps = get_all(g_highonly, {}) elif nohigh: jumps = get_all(g_nohigh, {}) else: jumps = get_all(g_all, {}) self.g.add_edges_from(jumps)
def __init__(self, names=False, categories=[0], rigs=True, detail=-1): # This query gets inventable items. Categories is an array of category IDs. [0] is the # default and represents all categories. Expected categories: 6, 7, 8, 18, 22. If detail # is set it must be an itemid and that will be all the information that is returned. if len(categories) == 1 and categories[0] == 0: categories = [6, 7, 8, 18, 22] if detail != -1: self.inventable_items = [[detail]] elif rigs == False: self.inventable_items = get_all(g_inventable_no_rigs['sql'] % ','.join(['%s'] * len(categories)), tuple(categories)) else: self.inventable_items = get_all(g_inventable_categories['sql'] % ','.join(['%s'] * len(categories)), tuple(categories)) self.items = dict() self.names = names
def get_all_orders(self): self.data = db.get_all("orders", [ "id", "weight", "region", "delivery_hours", "assigned", "completed" ]) for order in self.data: order["delivery_hours"] = json.loads(order["delivery_hours"]) self.to_internal_value()
def do_GET(self): o = urlsplit(self.path) # Only index if o.path != '/': self.send_error(404) return table_rows = [] for i, x in reversed(list(enumerate(db.get_all(reversed=False), 1))): table_rows.append(f''' <tr> <td>{i}</td> <td>{x['date']}</td> <td>{str(x['cold']).zfill(5)}</td> <td>{str(x['hot']).zfill(5)}</td> </tr> ''') text = HTML_TEMPLATE_INDEX \ .replace('{{ table_rows }}', ''.join(table_rows)) self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Connection', 'close') self.end_headers() self.wfile.write(text.encode('utf-8'))
def request_things(): uuid = request.form.get('uuid', False) category = request.form.get('category', False) item = request.form.get('item', False) email = request.form.get('email', False) try: image = request.files["image"] except KeyError: image = False ip = request.environ.get('HTTP_X_REAL_IP', request.remote_addr) print(ip) if uuid and category and item and email: location = None try: location = get_all()[uuid]['location'] except KeyError: return "Error: disaster must be created before you can request things for it" if ip or ip == "76.112.42.21" or ip == "192.168.86.1" or ip == "192.168.86.41" or ip == "192.168.1.80": # allow localhost and my IP to bypass IP verification request_id = add_request(uuid, category, item, email, image) thread = Thread(target=nlp, kwargs={ 'name': item, 'category': category, 'id': uuid, 'request_id': request_id }) thread.start() return 'Added' else: return 'Error: IP address does not appear to be from the location of the disaster, please ensure you are not using a VPN', 527 else: return "Error: not all data was received"
def post_index(): username = request.form.get('username') password = request.form.get('password') user = {'username': username, 'password': password} if user in get_all(): session['username'] = username return 'đã đăng nhập thành công' return redirect(url_for('get_index'))
def get_food(): """ Hiển thị các món đang có """ if "username" in session: return render_template('food.html', data=get_all()) else: return redirect(url_for("login"))
def main(): random_nonce = random_string(32) session['nonce'] = random_nonce return render_template("index.html", data=db.get_all(), zone=config.zone, nonce=random_nonce)
def reminder(): user_data = db.get_all() lesson = {'07:50': 0, '09:25': 1, '10:55': 2, '12:55': 3, '14:25': 4} for user in user_data: schedule_rem = day_reminder(read_schedule(user))[lesson[datetime.datetime.now().strftime('%H:%M')]] if schedule_rem == '': schedule_rem = f"{lesson[datetime.datetime.now().strftime('%H:%M')]} - Пари немає" bot.send_message(user[0], schedule_rem)
def get_all(conn): try: response = db.get_all(conn, db.PLANTS_TABLE) for i in range(len(response)): response[i]['otherInfo'] = db.get_by_foreign_key( conn, db.OTHER_INFO_TABLE, 'plant_id', response[i]['id']) except Exception as e: return dao_utils.handle_db_error(e) return endpoint_utils.handle_get_finished_successfully(response)
def note_list(): access = check_authorization(request) if access['valid']: conn = db.connect_database() notes = db.get_all(conn) return jsonify(notes), 200 else: response = {'message': access['message']} return jsonify(response), 401
def _get_categories(self) -> List[Category]: """ Returns a list of all the categories in the database """ categories = db.get_all( "categories", ["category_codename", "category_name", "is_base_expense", "aliases"] ) res_categories = self._fill_aliases(categories) return res_categories
def post_food(): """ Thêm một món ăn """ food_name = request.form.get('name') food_price = request.form.get('price') food_image = request.form.get('image_url') add_food(food_name, food_price, food_image) return render_template('food.html', data=get_all())
def food(name: str): #uid = get_user() name = request.args.get('name') if name: food = db.get(collection="foods", resource_id=name) if food: return jsonify(food), 200 return jsonify({"error:", f"{name} not found in database"}), 404 else: return jsonify(db.get_all("foods")), 200
def pushStart(): """ Starts pushing-loop, preferably, this is started as a thread """ for row in db.get_all(QueueRow): q.put(row) threading.Thread(target=queuePusher).start() threading.Thread(target=runDoneQueue).start()
def list_command(): dic_command = {} _, session = init_db() all_name = get_all(session) dic_command['name'] = [] dic_command['filepath'] = [] for command in all_name: dic_command['name'].append(command.command_name) dic_command['filepath'].append(command.filepath) print(tabulate(dic_command, headers='keys', tablefmt="pretty"))
def manage_accounts(): if not current_user.is_superuser: return redirect('/') info = request.args.get('info', None) errs = [] return render_template('auth/manage.html', users=get_all(User), errors=errs, info=info)
def approve_all(message=None): global reddit posts = db.get_all() if posts != None: for entry in posts: submission = reddit.submission(entry.id) print(submission.title) if not check_post_deleted(submission): submission.mod.approve() if message != None: submission.author.message(entry.url, message) db.remove_entry(entry.id)
def datacores(self, item): typeid = item['typeID'] item['datacores'] = [] datacores = get_all(g_datacores['sql'], (typeid)) for datacore in datacores: dc = { 'typeID': datacore[g_datacores['typeID']], 'quantity': datacore[g_datacores['quantity']] } if self.names: dc['typeName'] = datacore[g_datacores['typeName']] item['datacores'].append(dc);
async def register_slave(websocket, wrapper): print("Registering slave...") # build agent info resources = [] for resource in wrapper.register_slave.slave.resources: if resource.type == messages_pb2.Value.SCALAR: resources.append( [resource.name, resource.type, resource.scalar.value]) elif resource.type == messages_pb2.Value.SET: resources.append([resource.name, resource.type, resource.set.item]) elif resource.type == messages_pb2.Value.RANGE: resources.append( [resource.name, resource.type, resource.ranges.range]) elif resource.type == messages_pb2.Value.TEXT: resources.append( [resource.name, resource.type, resource.text.value]) # build agent info attributes = [] for resource in wrapper.register_slave.slave.attributes: if resource.type == messages_pb2.Value.SCALAR: attributes.append( [resource.name, resource.type, resource.scalar.value]) elif resource.type == messages_pb2.Value.SET: attributes.append( [resource.name, resource.type, resource.set.item]) elif resource.type == messages_pb2.Value.RANGE: attributes.append( [resource.name, resource.type, resource.ranges.range]) elif resource.type == messages_pb2.Value.TEXT: attributes.append( [resource.name, resource.type, resource.text.value]) # add agent to db agent_id = db.add_agent(resources, attributes, "webs") sockets[websocket] = agent_id print(db.get_all()) # send response message response = messages_pb2.WrapperMessage() response.slave_registered.slave_id = str(agent_id) await websocket.send(response.SerializeToString())
def message(event): if event['message']['type'] != 'text': return text = event['message']['text'].split() reply_token = event['replyToken'] if len(text) >= 3 and text[1] in ['->', '=', '==', '=']: line_api.reply_message(reply_token, 'success') db.insert(text[0], text[2]) return # text2 = text[0].split('=') text2 = re.split(r'[==]', text[0]) if len(text2) == 2 and len(text) == 1: line_api.reply_message(reply_token, 'success') db.insert(text2[0], text2[1]) return reply = db.get_all() if text[0] in reply: line_api.reply_message(reply_token, reply[text[0]]) return
def do_GET(self): o = urlsplit(self.path) # Only index and ALLOW_LIST if o.path != '/' and o.path not in ALLOW_LIST: self.send_error(404) return if o.path in ALLOW_LIST: print('[o.path]', o.path) ext = get_ext(o.path) f = DIR / o.path.lstrip('/') data = f.read_bytes() self.send_response(200) self.send_header('Content-Type', MIME_BY_CONTENTYPE[ext]) self.send_header('Content-length', len(data)) self.end_headers() self.wfile.write(data) return table_rows = [] for i, x in reversed(list(enumerate(db.get_all(reversed=False), 1))): table_rows.append(f''' <tr> <td>{i}</td> <td>{x['date']}</td> <td>{str(x['cold']).zfill(5)}</td> <td>{str(x['hot']).zfill(5)}</td> </tr> ''') text = HTML_TEMPLATE_INDEX \ .replace('{{ table_rows }}', ''.join(table_rows)) self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Connection', 'close') self.end_headers() self.wfile.write(text.encode('utf-8'))
def on_get(self, req, resp): list_of_services = [] services = db.get_all() query_filter = req.params for k, v in services.iteritems(): if not self.filter_data(query_filter, v): continue service_summary = {} service_summary["name"] = v.get("name") service_summary["serviceType"] = v.get("serviceType") service_summary["serviceState"] = v.get("serviceState") service_summary["id"] = v.get("id") service_summary["href"] = v.get("href") service_summary["createdBy"] = v.get("createdBy") service_summary["createdOn"] = v.get("createdOn") list_of_services.append(service_summary) resp.status = falcon.HTTP_200 resp.body = success_response(list_of_services)
def request(data, type): print 'request starts!' if (type == 'db_add_or_update'): print "type: db_add_or_update" db.add_or_update_no_push(data) gtk.gdk.threads_enter() gui.notify(data) gtk.gdk.threads_leave() # elif (type == 'textMessage'): # print "type: textMessage" # msg = TextMessage(data.src, data.dst, data.msg) # db.add_or_update(msg) # msg.src_object = db.get_one_by_id(Employee, msg.src) # msg.dst_object = db.get_one_by_id(Employee, msg.dst) # db.commit() # gtk.gdk.threads_enter() # gui.notify(msg) # gtk.gdk.threads_leave() elif (type == 'pong'): """ Recieves a list with ID """ print 'type: pong' onlineLista = data """ Updates whos online in db """ print 'dbupdate to false' for mongo in db.get_all(Employee): mongo.online=False print 'dbupdate to true from list' for id in onlineLista: user = db.get_one_by_id(Employee, id) user.online = True db.commit()
def message(event): if event['message']['type'] != 'text': return text = event['message']['text'].split() reply_token = event['replyToken'] if len(text) >= 3 and text[1] in ['->', '=', '==', '=']: if re.match(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-]+', text[0]): return db.insert(text[0], text[2]) line_api.reply_message(reply_token, 'success') return text2 = re.split(r'[==]', event['message']['text']) if len(text2) >= 2: if re.match(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-]+', text2[0]): return db.insert(text2[0], text2[1]) line_api.reply_message(reply_token, 'success') return reply = db.get_all() if text[0] in reply: line_api.reply_message(reply_token, reply[text[0]]) return
def perfect_materials(self, item): typeid = item['typeID'] item['perfectMaterials'] = [] materials = get_all(g_perfect_materials['sql'], (typeid, typeid, typeid)) # This dict will be used to keep track of which materials have been seen. If a material # is listed in the regular list (affected by ME waste) and it appears in the extra # material list (not affected by ME waste), it must have PE waste applied to it. # This also requires the result set from the materials query to be sorted with normal # materials first followed by extra materials. requiredMaterials = dict() for material in materials: if material[g_perfect_materials['quantity']] > 0: typeid = material[g_perfect_materials['typeID']] wasteME = bool(material[g_perfect_materials['waste']]) # PE always applies if ME waste applies. wastePE = wasteME if typeid in requiredMaterials: wastePE = True else: requiredMaterials[typeid] = True pm = { 'typeID': typeid, 'quantity': float(material[g_perfect_materials['quantity']]), 'dmg': float(material[g_perfect_materials['dmg']]), 'wasteME': wasteME, 'wastePE': wastePE } if self.names: pm['name'] = material[g_perfect_materials['name']] item['perfectMaterials'].append(pm)
def main(): global reddit global has_bot_started print(f'{CLIENT_ID}') try: reddit = praw.Reddit(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, user_agent=USER_AGENT, username=USERNAME, password=PASSWORD) print(f"Authenticated as {reddit.user.me()}") except Exception: print( 'Invalid credentials, please enter correct credentials into {CREDENTIALS_FILENAME}' ) return try: subreddit = reddit.subreddit(SUBREDDIT) stream = subreddit.stream.submissions(pause_after=0) early_posts = set() db.initialize_database() approve_all() print('Approved remaining posts, waiting 5 seconds before startup:') time.sleep(5) # Ingore all older posts for submission in stream: if submission == None: break has_bot_started = True while True: ignored_posts = set() for submission in early_posts: if submission == None: break updated_submission = reddit.submission(submission) if check_post_deleted(updated_submission): ignored_posts.add(submission) continue # Check if automoderator has commented automoderator = mod_in_comments(updated_submission, MODERATOR) if automoderator: process_submission(reddit, updated_submission) ignored_posts.add(submission) # Clear out ignored_posts for submission in ignored_posts: early_posts.remove(submission) # Check new submissions for submission in stream: if submission == None: break if check_post_deleted(submission): continue # Check if automoderator has commented automoderator = mod_in_comments(submission, MODERATOR) if not automoderator: early_posts.add(submission.id) else: process_submission(reddit, submission) # Check inbox for message in reddit.inbox.unread(limit=None): if isinstance(message, praw.models.Message): print(f'Processing new message: {message.subject[4:]}') # Mark as read so it won't do it again later process_message(reddit, message) message.mark_read() # Remove all old posts from database posts = db.get_all() if posts != None: for entry in posts: submission = reddit.submission(entry.id) if check_post_deleted(submission): db.remove_entry(entry.id) elif hour_difference(submission.created_utc, time.time()) >= MAX_TIME: db.remove_entry(entry.id) reddit.redditor( entry.author).message(OLD_RESPONSE_MESSAGE) except Exception: print('An error has occurred, approving all and restarting...') try: approve_all() except Exception: print( 'Approving failed, closing bot and sending modmail describing situation' ) reddit.subreddit(SUBREDDIT).message(UNEXPECTED_SHUTDOWN_MESSAGE) return time.sleep(60) main()
def run(phrase): # Deduplicate companies, find common companies and contacts if phrase == 'p1': print('Phrase 1: Deduplicate companies, find common companies and contacts.') company_raw_list = pd.read_excel(rawfilepath, sheet_name='Company', sort=False, dtype=str) contact_raw_list = pd.read_excel(rawfilepath, sheet_name='Contact', sort=False, dtype=str) company_init_list = vd.init_list(company_raw_list, company_colnames, 'Company') contact_init_list = vd.init_list(contact_raw_list, contact_colnames, 'Contact', sourcename, timestamp) company_common_list, contact_common_list = vd.validate_common(company_init_list, contact_init_list) company_duplicate_list, company_duplicate_full, company_common_list, contact_common_list = vd.dedup_company(company_common_list, contact_common_list) reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl') backupwriter = pd.ExcelWriter(backupfilepath, engine='openpyxl') company_duplicate_list.to_excel(reviewwriter, index=False, header=True, columns=company_dup_colnames, sheet_name='1_Duplicate_Company') company_duplicate_full.to_excel(reviewwriter, index=False, header=True, columns=company_dup_colnames, sheet_name='1_Duplicate_Company_Full') company_common_list.to_excel(backupwriter, index=False, header=True, columns=company_colnames, sheet_name='company_common_list') contact_common_list.to_excel(backupwriter, index=False, header=True, columns=contact_colnames, sheet_name='contact_common_list') reviewwriter.save() reviewwriter.close() backupwriter.save() backupwriter.close() print('Check {}, {}, deduplicate companies need review. {} contains full list of duplicate companies.'.format(reviewfilepath, '1_Duplicate_Company', '1_Duplicate_Company_Full')) print('{} companies are duplicates in this load.'.format(len(company_duplicate_list))) # Merge deduplicate companies and format relative contacts elif phrase == 'p2': print('Phrase 2: Merge deduplicate companies and clean relative contacts.') company_common_list = pd.read_excel(backupfilepath, sheet_name='company_common_list', sort=False) contact_common_list = pd.read_excel(backupfilepath, sheet_name='contact_common_list', sort=False) company_duplicate_list = pd.read_excel(reviewfilepath, sheet_name='1_Duplicate_Company', sort=False) company_dedup_list, contact_format_list = vd.dedup_fix(company_common_list, contact_common_list, company_duplicate_list) company_db_return = db.get_all(company_load_colnames, 'Company') if company_db_return.empty: company_existing_list = company_db_return else: company_dedup_list, company_existing_list = vd.dedup_comany_db(company_dedup_list, company_db_return) company_dedup_list = vd.map_state(company_dedup_list) backupwriter = pd.ExcelWriter(backupfilepath, engine='openpyxl') reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl') backupbook = load_workbook(backupwriter.path) reviewbook = load_workbook(reviewwriter.path) backupwriter.book = backupbook reviewwriter.book = reviewbook company_existing_list.to_excel(reviewwriter, index=False, header=True, columns=company_colnames, sheet_name='2_Existing_Company') company_dedup_list.to_excel(backupwriter, index=False, header=True, columns=list(company_dedup_list), sheet_name='company_dedup_list') contact_format_list.to_excel(backupwriter, index=False, header=True, columns=contact_colnames, sheet_name='contact_format_list') backupwriter.save() backupwriter.close() reviewwriter.save() reviewwriter.close() print('Check {}, {}.'.format(reviewfilepath, '2_Existing_Company')) print('{} companies already exists in local staging table.'.format(len(company_existing_list))) # Run web scraper to enrich company details elif phrase == 'p3': print('Phrase 3: Run web scraper to enrich company details.') company_dedup_list = pd.read_excel(backupfilepath, sheet_name='company_dedup_list', sort=False) company_scrapy_return = qichacha(company_dedup_list[company_dedup_list['db_New'] != False], scrapyfilepath, 'company_scrapy_return') company_scrapy_return.to_excel(scrapyfilepath, index=False, header=True, columns=list(company_scrapy_return), sheet_name='company_scrapy_return') # Enrich companies with web scraper returns elif phrase == 'p4': print('Phrase 4: Enrich companies with web scraper returns') company_scrapy_return = pd.read_excel(scrapyfilepath, sheet_name='company_scrapy_return', sort=False) company_scrapy_return = vd.init_list(company_scrapy_return, list(company_scrapy_return)) company_scrapy_return['Confidence'] = company_scrapy_return.apply(getConfidence, axis=1) company_scrapy_return['境外公司'] = company_scrapy_return['境外公司'].replace({0: False, 1: True}) company_scrapy_return.to_excel(scrapyfilepath, index=False, header=True, columns=list(company_scrapy_return), sheet_name='company_scrapy_return') company_dedup_list = pd.read_excel(backupfilepath, sheet_name='company_dedup_list', sort=False) company_scrapy_list, company_scrapy_verify = vd.enrich_company(company_dedup_list, company_scrapy_return, company_colnames) backupwriter = pd.ExcelWriter(backupfilepath, engine='openpyxl') reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl') backupbook = load_workbook(backupwriter.path) reviewbook = load_workbook(reviewwriter.path) backupwriter.book = backupbook reviewwriter.book = reviewbook company_scrapy_list.to_excel(backupwriter, index=False, header=True, columns=company_colnames, sheet_name='company_scrapy_list') company_scrapy_verify.to_excel(reviewwriter, index=False, header=True, columns=company_colnames, sheet_name='3_No_Address_Company') backupwriter.save() backupwriter.close() reviewwriter.save() reviewwriter.close() # db.load_staging(company_scrapy_return, list(company_scrapy_return), 'Scrapy', sourcename, timestamp) print('Check {}, {}, enrich companies without address.'.format(reviewfilepath, '3_No_Address_Company')) print('{} companies remain no address.'.format(len(company_scrapy_verify))) # Enrich company with business return, validate contact elif phrase == 'p5': print('Phrase 5: Enrich company with business return, validate contact.') company_business_return = pd.read_excel(reviewfilepath, sheet_name='3_No_Address_Company', sort=False) company_scrapy_list = pd.read_excel(backupfilepath, sheet_name='company_scrapy_list', sort=False) contact_format_list = pd.read_excel(backupfilepath, sheet_name='contact_format_list', sort=False) company_load_list = vd.enrich_business(company_scrapy_list, company_business_return) contact_db_return = db.get_all(contact_load_colnames, 'Contact') if contact_db_return.empty: contact_dedup_list = contact_format_list else: contact_dedup_list = vd.dedup_contact_db(contact_format_list, contact_db_return) contact_validate_list = vd.validate_contacts(contact_dedup_list, contact_colnames, company_load_list) company_load_list = company_load_list[company_load_list['Load'] == True] db.load_staging(company_load_list, company_load_colnames, 'Company', sourcename, timestamp) reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl') reviewbook = load_workbook(reviewwriter.path) reviewwriter.book = reviewbook contact_validate_list.to_excel(reviewwriter, index=False, header=True, columns=contact_colnames, sheet_name='4_Validate_Contact') company_load_list.to_excel(reviewwriter, index=False, header=True, columns=company_load_colnames, sheet_name='5_Company_Load') reviewwriter.save() reviewwriter.close() print('Check {}, {}, contacts need to review.'.format(reviewfilepath, '4_Validate_Contact')) print('{} companies load into staging table.'.format(len(company_load_list))) print('{} contacts needs review'.format(len(contact_validate_list[contact_validate_list['Load'] == False]))) # Enrich contacts with business return elif phrase == 'p6': contact_business_list = pd.read_excel(reviewfilepath, sheet_name='4_Validate_Contact', sort=False) contact_load_list = contact_business_list[contact_business_list['Load'] == True] reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl') reviewbook = load_workbook(reviewwriter.path) reviewwriter.book = reviewbook contact_load_list.to_excel(reviewwriter, index=False, header=True, columns=contact_load_colnames, sheet_name='5_Contact_Load') reviewwriter.save() reviewwriter.close() db.load_staging(contact_load_list, contact_load_colnames, 'Contact', sourcename, timestamp) print('{} contacts load into staging table.'.format(len(contact_load_list[contact_load_list['Load'] != False]))) elif phrase == 'p7': print('Cross-check and log merge, deletion, modification record.') company_raw_list = pd.read_excel(rawfilepath, sheet_name='Company', sort=False) contact_raw_list = pd.read_excel(rawfilepath, sheet_name='Contact', sort=False) contact_raw_list['Source_ID'] = list(range(1, (len(contact_raw_list) + 1))) contact_raw_list['Source_ID'] = contact_raw_list['Source_ID'].apply(lambda x: sourcename + '_' + timestamp + '_' + 'Contact' + '_' + str(x)) company_load_list = pd.read_excel(reviewfilepath, sheet_name='5_Company_Load', sort=False) contact_load_list = pd.read_excel(reviewfilepath, sheet_name='5_Contact_Load', sort=False) company_logs = vd.staging_log(company_raw_list, company_load_list, 'Company', logs_columns) db.load_staging(company_logs, logs_columns, 'Logs', sourcename, timestamp) contact_logs = vd.staging_log(contact_raw_list, contact_load_list, 'Contact', logs_columns) db.load_staging(contact_logs, logs_columns, 'Logs', sourcename, timestamp) company_duplicate_list = pd.read_excel(reviewfilepath, sheet_name='1_Duplicate_Company_Full', sort=False) company_existing_list = pd.read_excel(reviewfilepath, sheet_name='2_Existing_Company', sort=False) company_standard_list = pd.read_excel(reviewfilepath, sheet_name='3_No_Address_Company', sort=False) company_summary = vd.staging_summary('Company', company_raw_list, company_duplicate_list, company_existing_list, company_standard_list, company_load_list) db.load_staging(company_summary, list(company_summary), 'Summary', sourcename, timestamp) contact_validate_list = pd.read_excel(reviewfilepath, sheet_name='4_Validate_Contact', sort=False) contact_duplicate_list = contact_validate_list[contact_validate_list['vc_Deduplicate'] == False] contact_existing_list = contact_validate_list[contact_validate_list['db_New'] == False] contact_standard_list = contact_validate_list[contact_validate_list['Load'] == False] contact_summary = vd.staging_summary('Contact', contact_raw_list, contact_duplicate_list, contact_existing_list, contact_standard_list, contact_load_list) db.load_staging(contact_summary, list(contact_summary), 'Summary', sourcename, timestamp)
#compare and generate diff dict. new = fo.get_all_file(ROOT+"/new_version") old = fo.get_all_file(ROOT+"/old_version") basic_diff_list = fo.basic_diff(new,old) all_diff_list = fo.basic_diff(new,{}) fo.all_to_temp(all_diff_list) db.update_all_index(all_diff_list) # sync db db.pub_new(latest_version) db.update_index(latest_version,basic_diff_list) QuerySet = db.get_all() for Query in QuerySet: if Query['pathindex'] != "[]": try: os.mkdir("version_"+Query["version"]) except OSError: pass # generate & copy files print QuerySet fo.make_file_from_db(QuerySet) fo.update_all_version(QuerySet)
def get(self, lang): doc = db.get_all(lang) if doc is None: return None, 404 return Response(json.dumps(doc), mimetype='application/json')
def show(): all_data = db.get_all() print(all_data) # def display(weather, restaurant, events):
def run(phrase): # Deduplicate companies, find common companies and contacts if phrase == 'p1': print('Phrase 1: Deduplicate companies, find common companies and contacts.') company_raw_list = pd.read_excel(rawfilepath, sheet_name='Company', sort=False, dtype=str) contact_raw_list = pd.read_excel(rawfilepath, sheet_name='Contact', sort=False, dtype=str) # Initialization company_init_list = vd.init_list(company_raw_list, company_colnames, mode='Company') contact_init_list = vd.init_list(contact_raw_list, contact_colnames, mode='Contact', sourcename=sourcename, timestamp=timestamp, company=company_init_list) # Deduplication within source data company_common_list, contact_common_list = vd.validate_common(company_init_list, contact_init_list) # Map state abbreviation and enrich state company_common_list = vd.map_state(company_common_list) company_duplicate_list, company_duplicate_full, company_dedup_list, contact_common_list = vd.dedup_company(company_common_list, contact_common_list) # Deduplication against staging table company_db_return = db.get_all(company_load_colnames, 'Company') if company_db_return.empty: company_existing_list = company_db_return else: company_dedup_list, company_existing_list = vd.dedup_comany_db(company_dedup_list, company_db_return) # Keep companies not duplicates # company_dedup_list = company_dedup_list[company_dedup_list['Load'] == True] print('Check {}, {}, deduplicate companies need review. {} contains full list of duplicate companies.'.format(reviewfilepath, '1_Duplicate_Company', '1_Duplicate_Company_Full')) print('{} companies are duplicates in this load.'.format(len(company_duplicate_list))) print('Check {}, {}.'.format(reviewfilepath, '2_Existing_Company')) print('{} companies already exists in local staging table.'.format(len(company_existing_list))) backupwriter = pd.ExcelWriter(backupfilepath, engine='openpyxl') reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl') company_duplicate_list.sort_values(by=['ComName_temp']).to_excel(reviewwriter, index=False, header=True, columns=company_dup_colnames, sheet_name='1_Duplicate_Company') company_duplicate_full.sort_values(by=['ComName_temp']).to_excel(reviewwriter, index=False, header=True, columns=company_dup_colnames, sheet_name='1_Duplicate_Company_Full') company_existing_list.sort_values(by=['ComName_temp']).to_excel(reviewwriter, index=False, header=True, columns=list(company_existing_list), sheet_name='2_Existing_Company') company_dedup_list.to_excel(backupwriter, index=False, header=True, columns=company_colnames, sheet_name='company_dedup_list') contact_common_list.to_excel(backupwriter, index=False, header=True, columns=contact_colnames, sheet_name='contact_common_list') backupwriter.save() backupwriter.close() reviewwriter.save() reviewwriter.close() # Run web scraper to enrich company details elif phrase == 'p2': print('Phrase 2: Run web scraper to enrich company details.') company_dedup_list = pd.read_excel(backupfilepath, sheet_name='company_dedup_list', sort=False) # company_dedup_list = company_dedup_list[company_dedup_list['db_New'] != False] company_dedup_list = company_dedup_list[(company_dedup_list['db_New'] != False) & (company_dedup_list['Load'] == True) & pd.isnull(company_dedup_list['Billing_Address']) & pd.isnull(company_dedup_list['Billing_Address_CN'])] company_scrapy_return = qichacha(company_dedup_list, scrapyfilepath, 'company_scrapy_return') company_scrapy_return.to_excel(scrapyfilepath, index=False, header=True, columns=list(company_scrapy_return), sheet_name='company_scrapy_return') # Enrich companies with web scraper returns, validate contact elif phrase == 'p3': print('Phrase 3: Enrich companies with web scraper returns') company_scrapy_return = pd.read_excel(scrapyfilepath, sheet_name='company_scrapy_return', sort=False) company_scrapy_return = vd.init_list(company_scrapy_return, list(company_scrapy_return), mode='') company_scrapy_return['Confidence'] = company_scrapy_return.apply(getConfidence, axis=1) company_scrapy_return['境外公司'] = company_scrapy_return['境外公司'].replace({0: False, 1: True}) company_scrapy_return.to_excel(scrapyfilepath, index=False, header=True, columns=list(company_scrapy_return), sheet_name='company_scrapy_return') company_dedup_list = pd.read_excel(backupfilepath, sheet_name='company_dedup_list', sort=False) company_scrapy_list, company_scrapy_verify = vd.enrich_company(company_dedup_list, company_scrapy_return, company_colnames) company_scrapy_verify = company_scrapy_verify[(company_scrapy_verify['vc_Deduplicate'] == True) & (company_scrapy_verify['db_New'] == True)] print('Check {}, {}, enrich companies without address.'.format(reviewfilepath, '3_No_Address_Company')) print('{} companies remain no address.'.format(len(company_scrapy_verify))) print('Phrase 4: Validate contact.') # Validate contact contact_common_list = pd.read_excel(backupfilepath, sheet_name='contact_common_list', sort=False) contact_db_return = db.get_all(contact_load_colnames, 'Contact') if contact_db_return.empty: contact_dedup_list = contact_common_list else: contact_dedup_list = vd.dedup_contact_db(contact_common_list, contact_db_return) contact_validate_list = vd.validate_contacts(contact_dedup_list, contact_colnames, company_scrapy_list) contact_review_list = contact_validate_list[contact_validate_list['Load'] == False] contact_validate_list = contact_validate_list[contact_validate_list['Load'] == True] backupwriter = pd.ExcelWriter(backupfilepath, engine='openpyxl') reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl') backupbook = load_workbook(backupwriter.path) reviewbook = load_workbook(reviewwriter.path) backupwriter.book = backupbook reviewwriter.book = reviewbook company_scrapy_list.to_excel(backupwriter, index=False, header=True, columns=company_colnames, sheet_name='company_scrapy_list') company_scrapy_verify.sort_values(by=['ComName_temp']).to_excel(reviewwriter, index=False, header=True, columns=company_colnames, sheet_name='3_No_Address_Company') contact_validate_list.sort_values(by=['First_Name', 'Last_Name', 'First_Name_CN', 'Last_Name_CN']).to_excel(backupwriter, index=False, header=True, columns=contact_load_colnames, sheet_name='contact_validate_list') contact_review_list.to_excel(reviewwriter, index=False, header=True, columns=contact_colnames, sheet_name='4_Validate_Contact') backupwriter.save() backupwriter.close() reviewwriter.save() reviewwriter.close() print('Check {}, {}, contacts need to review.'.format(reviewfilepath, '4_Validate_Contact')) print('{} contacts needs review'.format(len(contact_review_list))) # Enrich companies, contacts with business return, load company and contact into staging table elif phrase == 'p4': print('Phrase 5: Enrich companies') company_duplicate_review = pd.read_excel(reviewfilepath, sheet_name='1_Duplicate_Company', sort=False) company_existing_review = pd.read_excel(reviewfilepath, sheet_name='2_Existing_Company', sort=False) company_address_review = pd.read_excel(reviewfilepath, sheet_name='3_No_Address_Company', sort=False) company_scrapy_list = pd.read_excel(backupfilepath, sheet_name='company_scrapy_list', sort=False) # Enrich companies from business review company_load_list = vd.enrich_business(company_scrapy_list, company_duplicate_review) company_load_list = vd.enrich_business(company_load_list, company_existing_review) # Enrich companies without address company_load_list = vd.enrich_no_address(company_load_list, company_address_review) company_load_list = vd.enrich_business(company_load_list, company_address_review) company_min_drop = company_address_review[company_address_review['Load'] == False] print('Check {}, {}, companies cannot meet minimum standard.'.format(reviewfilepath, '5_Company_Drop')) print('{} companies are dropped'.format(len(company_min_drop))) # Merge deduplicate companies and format relative contacts print('Phrase 6: Merge deduplicate companies and clean relative contacts. Enrich contacts with business return.') contact_validate_review = pd.read_excel(reviewfilepath, sheet_name='4_Validate_Contact', sort=False) contact_validate_list = pd.read_excel(backupfilepath, sheet_name='contact_validate_list', sort=False) contact_load_list = vd.enrich_business(contact_validate_list, contact_validate_review) temp, contact_load_list = vd.dedup_fix(company_load_list, contact_load_list, company_duplicate_review) temp, contact_load_list = vd.dedup_fix(company_load_list, contact_load_list, company_existing_review) contact_load_list = vd.enrich_contact(company_load_list, contact_load_list, company_load_colnames) # contact_load_list = contact_load_list[contact_load_list['Load'] != False] contact_load_list.loc[~contact_load_list['Source_Company_ID'].isin(company_load_list['Source_ID'].tolist()), 'Load'] = False contact_no_company = contact_load_list[~contact_load_list['Source_Company_ID'].isin(company_load_list['Source_ID'].tolist())] contact_no_company['Reject_Reason'] = 'No company; ' contact_min_list = contact_validate_list[contact_validate_list['Load'] == False] contact_min_list = contact_min_list.append(contact_no_company) print('Check {}, {}, contacts cannot meet minimum standard.'.format(reviewfilepath, '5_Contact_Drop')) print('{} contacts are dropped, because companies are dropped.'.format(len(contact_min_list))) # Load company and contact into staging table print('Phrase 7: Load company and contact into staging table.') db.load_staging(company_load_list, company_load_colnames, 'Company', sourcename, timestamp) db.load_staging(contact_load_list[contact_load_list['Load']!=False], contact_load_colnames, 'Contact', sourcename, timestamp) print('{} companies load into staging table.'.format(len(company_load_list))) print('{} contacts load into staging table.'.format(len(contact_load_list))) print('Phrase 8: Cross-check and log merge, deletion, modification record.') # Loading logs company_raw_list = pd.read_excel(rawfilepath, sheet_name='Company', sort=False) contact_raw_list = pd.read_excel(rawfilepath, sheet_name='Contact', sort=False) contact_raw_list['Source_ID'] = list(range(1, (len(contact_raw_list) + 1))) contact_raw_list['Source_ID'] = contact_raw_list['Source_ID'].apply(lambda x: sourcename + '_' + timestamp + '_' + 'Contact' + '_' + str(x)) company_logs = vd.staging_log(company_raw_list, company_load_list, 'Company', logs_columns) db.load_staging(company_logs, logs_columns, 'Logs', sourcename, timestamp) contact_logs = vd.staging_log(contact_raw_list, contact_load_list, 'Contact', logs_columns) db.load_staging(contact_logs, logs_columns, 'Logs', sourcename, timestamp) # Loading summary company_duplicate_list = pd.read_excel(reviewfilepath, sheet_name='1_Duplicate_Company_Full', sort=False) company_existing_list = pd.read_excel(reviewfilepath, sheet_name='2_Existing_Company', sort=False) company_standard_list = pd.read_excel(reviewfilepath, sheet_name='3_No_Address_Company', sort=False) company_summary = vd.staging_summary('Company', company_raw_list, company_duplicate_list, company_existing_list, company_standard_list, company_load_list) db.load_staging(company_summary, list(company_summary), 'Summary', sourcename, timestamp) contact_validate_list = pd.read_excel(reviewfilepath, sheet_name='4_Validate_Contact', sort=False) contact_duplicate_list = contact_validate_list[contact_validate_list['vc_Deduplicate'] == False] contact_existing_list = contact_validate_list[contact_validate_list['db_New'] == False] contact_standard_list = contact_validate_list[contact_validate_list['Load'] == False] contact_summary = vd.staging_summary('Contact', contact_raw_list, contact_duplicate_list, contact_existing_list, contact_standard_list, contact_load_list) db.load_staging(contact_summary, list(contact_summary), 'Summary', sourcename, timestamp) # db.load_staging(company_scrapy_return, list(company_scrapy_return), 'Scrapy', sourcename, timestamp) reviewwriter = pd.ExcelWriter(reviewfilepath, engine='openpyxl') reviewbook = load_workbook(reviewwriter.path) reviewwriter.book = reviewbook company_min_drop.to_excel(reviewwriter, index=False, header=True, columns=company_load_colnames, sheet_name='5_Company_Drop') contact_min_list.to_excel(reviewwriter, index=False, header=True, columns=company_load_colnames, sheet_name='5_Contact_Drop') company_load_list.to_excel(reviewwriter, index=False, header=True, columns=company_load_colnames, sheet_name='6_Company_Load') contact_load_list.to_excel(reviewwriter, index=False, header=True, columns=contact_load_colnames, sheet_name='6_Contact_Load') reviewwriter.save() reviewwriter.close() print('---------- Done.---------- ') elif phrase == 'Reload': company_load_list = pd.read_excel(reviewfilepath, sheet_name='6_Company_Load', sort=False) contact_load_list = pd.read_excel(reviewfilepath, sheet_name='6_Contact_Load', sort=False) db.load_staging(company_load_list, company_load_colnames, 'Company', sourcename, timestamp) db.load_staging(contact_load_list, contact_load_colnames, 'Contact', sourcename, timestamp) # Loading logs company_raw_list = pd.read_excel(rawfilepath, sheet_name='Company', sort=False) contact_raw_list = pd.read_excel(rawfilepath, sheet_name='Contact', sort=False) contact_raw_list['Source_ID'] = list(range(1, (len(contact_raw_list) + 1))) contact_raw_list['Source_ID'] = contact_raw_list['Source_ID'].apply(lambda x: sourcename + '_' + timestamp + '_' + 'Contact' + '_' + str(x)) company_logs = vd.staging_log(company_raw_list, company_load_list, 'Company', logs_columns) db.load_staging(company_logs, logs_columns, 'Logs', sourcename, timestamp) contact_logs = vd.staging_log(contact_raw_list, contact_load_list, 'Contact', logs_columns) db.load_staging(contact_logs, logs_columns, 'Logs', sourcename, timestamp) # Loading summary company_duplicate_list = pd.read_excel(reviewfilepath, sheet_name='1_Duplicate_Company_Full', sort=False) company_existing_list = pd.read_excel(reviewfilepath, sheet_name='2_Existing_Company', sort=False) company_standard_list = pd.read_excel(reviewfilepath, sheet_name='3_No_Address_Company', sort=False) company_summary = vd.staging_summary('Company', company_raw_list, company_duplicate_list, company_existing_list, company_standard_list, company_load_list) db.load_staging(company_summary, list(company_summary), 'Summary', sourcename, timestamp) contact_validate_list = pd.read_excel(reviewfilepath, sheet_name='4_Validate_Contact', sort=False) contact_duplicate_list = contact_validate_list[contact_validate_list['vc_Deduplicate'] == False] contact_existing_list = contact_validate_list[contact_validate_list['db_New'] == False] contact_standard_list = contact_validate_list[contact_validate_list['Load'] == False] contact_summary = vd.staging_summary('Contact', contact_raw_list, contact_duplicate_list, contact_existing_list, contact_standard_list, contact_load_list) db.load_staging(contact_summary, list(contact_summary), 'Summary', sourcename, timestamp)
def all_disaster_data(): return jsonify(get_all())
def favourites(): all = db.get_all() return render_template("favs.html", all=all)
def get_20_results(): return db.get_all(20)
def collection_get(self): return get_all(delim=['name','image','agency','description','date'])