def main(): log = Logger(dir_log + "test.log") print(log._timestamp()) log._log('일반적인 설명문장을 로그에 기록 합니다.') log.info('정보에 관한 로그문장을 로그에 기록 합니다.') log.error('에러발생시 에러메시지를 로그에 기록 합니다.') log.warning('경고 발생시 메시지를 로그에 기록 합니다.')
class Server(object): def __init__(self, config): self.logger = Logger("Server") if not config: self.logger.warning("Unable to find server configuration") sys.exit() self.type = config["type"] self.port = config["port"] self.max_penguins = config["max_penguins"] if self.type is "world" else 150 self.penguins = [] self.data_handler = DataHandler(self) def start_server(self): server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) server.bind(("127.0.0.1", self.port)) self.logger.info("{0} server listening on port {1}".format(self.type, self.port)) server.listen(10000) while True: client, addr = server.accept() if len(self.penguins) >= self.max_penguins: client.close() self.logger.debug("new connection from {}".format(addr)) penguin = Penguin(self, client) try: data = client.recv(512).decode("utf-8") self.logger.debug(data) if data[0] == "<": self.logger.debug("received XML packet - {}".format(data)) self.data_handler.handle_xml(penguin, data) elif data[0] == "%": self.logger.debug("received RAW packet - {}".format(data)) self.data_handler.handle_raw(penguin, data) else: self.logger.debug("received rogue packet - {}".format(data)) finally: client.close()
def write_out_file(messages, outputfileloc, logger, filename='out.txt'): """ Functions to write messages in file. It creates folder on date wise and write stats of the consumed messages from sqs app. :param list messages: a list of messages :param string outputfileloc: folder location to write the stats :param Logger logger: a logger instance :param string filename: filename to write stats on outputfileloc :returns None: """ date_dir = datetime.today().strftime('%Y%m%d') path_of_outfile = os.path.join(os.sep, outputfileloc, date_dir) createdir(path_of_outfile) outfile = os.path.join(path_of_outfile, filename) with open(outfile, 'w') as f: for stat in messages_stats: f.write("%s\n" % stat) logger.info("Stats is written in out file.") if __name__ == '__main__': logger = Logger().get_logger() logger.info('Starting the sqs client..') client = boto3.client('sqs', endpoint_url=EndPointUrl) messages = get_all_messages(client, QueueUrl, logger) messages_stats = get_stats(messages, logger) write_out_file(messages_stats, outputfileloc, logger)
class CyBot: def __init__(self, chromedriver, wait=5, delay=3): self._logger = Logger('cybot.log') self._logger.info('크롬 드라이버 로딩 중..') driver = webdriver.Chrome(chromedriver) driver.implicitly_wait(wait) self._logger.info('크롬 드라이버 로딩 완료') self._chromedriver = chromedriver self._base_url = 'https://cy.cyworld.com' self._user_id = '' self._wait_time = wait self._delay = delay self._driver = driver self._wait = WebDriverWait(driver, wait) def init(self): self._logger.info('싸이월드 홈페이지 접속 중..') # 싸이월드 페이지 열기 self._driver.get('https://cyworld.com') self._logger.success('싸이월드 홈페이지 접속 완료') return self def login(self, user_email, user_password): self._logger.info('로그인 시도 중..') prev_url = self._driver.current_url self._driver.find_element_by_name('email').send_keys(user_email) self._driver.find_element_by_name('passwd').send_keys(user_password, Keys.RETURN) try: self._wait.until(EC_or( EC.url_changes(prev_url), EC.invisibility_of_element( \ (By.CSS_SELECTOR, '.ui-dialog.ui-widget.ui-widget-content.ui-corner-all.ui-front.ui-draggable.ui-resizable')) )) except: self._logger.error('시간이 초과되었습니다') exit() url = self._driver.current_url if 'timeline' in url: self._logger.success('로그인 성공') return self else: self._logger.error('사용자 정보를 확인해주세요') exit() def home(self): self._logger.info('마이 홈으로 이동 중..') prev_url = self._driver.current_url # 유저 고유번호 추출 profile = self._driver.find_element_by_css_selector('a.freak1') self._user_id = profile.get_attribute('href').split('/').pop() # 프로필 사진 영역 클릭 self._driver.find_element_by_id('imggnbuser').click() try: self._wait.until(EC.url_changes(prev_url)) except: self._logger.error('시간이 초과되었습니다') exit() if 'home' not in self._driver.current_url: self._logger.error('마이 홈으로 이동할 수 없습니다') exit() self._logger.success('이동 완료') return self def feeder(self, content_list, running): content_index = 0 # 모든 타임라인 컨텐츠 영역 추출 while self._driver.find_element_by_css_selector('p.btn_list_more'): contents = self._driver \ .find_elements_by_css_selector( 'input[name="contentID[]"]' )[content_index:] for content in contents: cid = content.get_attribute('value') content_url = '{}/home/{}/post/{}/layer' \ .format(self._base_url, self._user_id, cid) self._logger.info('Feeder::', content_url) content_list.append(content_url) content_index += 1 # 더 보기 버튼 대기 try: next_button = self._wait.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, 'p.btn_list_more')) ) time.sleep(self._delay) except: pass # 더 보기 버튼을 클릭할 수 없는 경우 (마지막 페이지인 경우) 반복 종료 if not (next_button.is_displayed() and next_button.is_enabled()): running.value = 0 break # 다음버튼 클릭 next_button.click() running.value = 0 self._driver.close() self._logger.info('Feeder:: 종료') def run(self, parser=2, downloader=2): self._logger.info('이미지 다운로드 작업 시작') start = time.time() # 멀티 프로세싱 처리를 위한 매니저 with Manager() as manager: # 프로세스 목록 processes = [] # 공유 메모리 변수 content_list = manager.list() image_list = manager.list() count = manager.Value('i', 0) lock = manager.Lock() feeder_running = manager.Value('i', 1) parser_running = manager.Value('i', 1) parser_logger = Logger('cybot_parser.log') downloader_logger = Logger('cybot_downloader.log') main_cookies = self._driver.get_cookies() cookie = [] for c in main_cookies: cookie.append({ 'name': c['name'], 'value': c['value'] }) # 파서 프로세스 생성 및 시작 for idx in range(parser): parser_instance = Parser( self._chromedriver, cookie, parser_logger, self._wait_time, self._delay ) parser_process = Process( target=parser_instance.parse, \ args=( content_list, image_list, feeder_running, parser_running ) ) parser_process.name = 'Parser::' + str(idx) parser_process.start() processes.append(parser_process) self._logger.info('Parser', str(idx), '프로세스 시작') # 다운로더 프로세스 생성 및 시작 for idx in range(downloader): downloader_instance = Downloader(downloader_logger) downloader_process = Process( target=downloader_instance.download, \ args=(image_list, count, lock, parser_running)) downloader_process.name = 'Downloader::' + str(idx) downloader_process.start() processes.append(downloader_process) self._logger.info('Downloader', str(idx), '프로세스 시작') # 피더 프로세스 시작 self._logger.info('Feeder 시작') self.feeder(content_list, feeder_running) # 파서, 다운로더 프로세스가 종료되지않은 경우 대기 for p in processes: p.join() self._logger.info('작업 소요시간: {}초' \ .format(round(time.time() - start, 2))) self._logger.info('전체 이미지 수: {}'.format(count.value))
class CyBot: __VERSION__ = '1.0.2' def __init__(self, chromedriver, wait=5, delay=3, \ headless=False, onlog=None, onerror=exit, done=exit): self._logger = Logger('cybot.log', callback=onlog) self._chromedriver = chromedriver self._base_url = 'https://cy.cyworld.com' self._user_id = '' self._wait_time = wait self._delay = delay self._headless = headless self._onlog = onlog self._onerror = onerror self._done = done self._options = None self._driver = None self._wait = None def init(self): self._logger.info('크롬 드라이버 로딩 중..') try: options = webdriver.ChromeOptions() if self._headless: options.add_argument('headless') options.add_argument('window-size=800x600') options.add_argument("disable-gpu") options.add_argument('log-level=3') options.add_argument('--ignore-certificate-errors') options.add_argument('--ignore-ssl-errors') driver = webdriver.Chrome(self._chromedriver, \ chrome_options=options) driver.implicitly_wait(self._wait_time) except Exception as e: self._logger.error('크롬 드라이버 로딩 실패', detail=e) self._onerror() return self._options = options self._driver = driver self._wait = WebDriverWait(driver, self._wait_time) self._logger.info('크롬 드라이버 로딩 완료') # 싸이월드 페이지 열기 self._logger.info('싸이월드 홈페이지 접속 중..') self._driver.get('https://cyworld.com') self._logger.success('싸이월드 홈페이지 접속 완료') return self def login(self, user_email, user_password): self._logger.info('로그인 시도 중..') prev_url = self._driver.current_url try: self._driver.find_element_by_name('email').send_keys(user_email) self._driver.execute_script( 'arguments[0].value = "{}"'.format(user_password), self._driver.find_element_by_name('passwd')) self._driver.find_element_by_name('passwd').send_keys(Keys.RETURN) except Exception as e: self._logger.error('알 수 없는 오류가 발생했습니다', detail=e) self._onerror() return None time.sleep(3) try: selectors = [ '.ui-dialog', '.ui-widget', '.ui-widget-content', '.ui-corner-all', '.ui-front', '.ui-draggable', '.ui-resizable' ] self._wait.until(EC_or( EC.url_changes(prev_url), EC.invisibility_of_element( \ (By.CSS_SELECTOR, ''.join(selectors))) )) except Exception as e: self._logger.error('시간이 초과되었습니다', detail=e) self._onerror() return None url = self._driver.current_url if 'timeline' in url: self._logger.success('로그인 성공') return self elif 'pwd' in url.lower(): # a.next 클릭으로 "다음에 변경하기" 클릭 가능 # 사용자의 개인정보와 관련되어있기 때문에 직접 조작하도록 함 self._logger.error('싸이월드에 직접 로그인하여 비밀번호 변경 페이지를 확인한 후 다시 시도해주세요') self._onerror() return None else: self._logger.error('사용자 정보를 확인해주세요') self._onerror() return None def home(self): self._logger.info('마이 홈으로 이동 중..') prev_url = self._driver.current_url # 유저 고유번호 추출 profile = self._driver.find_element_by_css_selector('a.freak1') self._user_id = profile.get_attribute('href').split('/').pop() # 프로필 사진 영역 클릭 self._driver.find_element_by_id('imggnbuser').click() try: self._wait.until(EC.url_changes(prev_url)) except Exception as e: self._logger.error('시간이 초과되었습니다', detail=e) self._onerror() return None if 'home' not in self._driver.current_url: self._logger.error('마이 홈으로 이동할 수 없습니다') self._onerror() return None self._logger.success('이동 완료') return self def feeder(self, content_list, running): content_index = 0 # 모든 타임라인 컨텐츠 영역 추출 while self._driver.find_element_by_css_selector('p.btn_list_more'): contents = self._driver \ .find_elements_by_css_selector( 'input[name="contentID[]"]' )[content_index:] for content in contents: cid = content.get_attribute('value') content_url = '{}/home/{}/post/{}/layer' \ .format(self._base_url, self._user_id, cid) self._logger.info('Feeder::', content_url, callback=False) if self._onlog: self._onlog('{}개의 게시물 다운로드 중..'.format(content_index + 1)) content_list.append(content_url) content_index += 1 # 더 보기 버튼 대기 try: next_button = self._wait.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, 'p.btn_list_more'))) time.sleep(self._delay) except: pass # 더 보기 버튼을 클릭할 수 없는 경우 (마지막 페이지인 경우) 반복 종료 if not (next_button.is_displayed() and next_button.is_enabled()): running.value = 0 break # 다음버튼 클릭 next_button.click() running.value = 0 self._driver.close() self._logger.info('Feeder:: 종료', callback=False) if self._onlog: self._onlog( '총 {}개의 게시물이 확인되었습니다.\n다운로드가 완료될 때 까지 잠시만 기다려주세요'.format( content_index)) def run(self, parser=2, downloader=2): self._logger.info('이미지 다운로드 작업 시작') start = time.time() # 멀티 프로세싱 처리를 위한 매니저 with Manager() as manager: # 프로세스 목록 processes = [] # 공유 메모리 변수 content_list = manager.list() image_list = manager.list() count = manager.Value('i', 0) lock = manager.Lock() feeder_running = manager.Value('i', 1) parser_running = manager.Value('i', 1) parser_logger = Logger('cybot_parser.log') downloader_logger = Logger('cybot_downloader.log') main_cookies = self._driver.get_cookies() cookie = [] for c in main_cookies: cookie.append({'name': c['name'], 'value': c['value']}) # 파서 프로세스 생성 및 시작 for idx in range(parser): parser_instance = Parser(self._chromedriver, cookie, parser_logger, self._wait_time, self._delay, self._headless, self._options) parser_process = Process(target=parser_instance.parse, args=(content_list, image_list, feeder_running, parser_running), daemon=True) parser_process.name = 'Parser::' + str(idx) parser_process.start() processes.append(parser_process) self._logger.info('Parser', str(idx), '프로세스 시작') # 다운로더 프로세스 생성 및 시작 for idx in range(downloader): downloader_instance = Downloader(downloader_logger) downloader_process = Process( target=downloader_instance.download, args=(image_list, count, lock, parser_running), daemon=True) downloader_process.name = 'Downloader::' + str(idx) downloader_process.start() processes.append(downloader_process) self._logger.info('Downloader', str(idx), '프로세스 시작') # 피더 프로세스 시작 self._logger.info('Feeder 시작') self.feeder(content_list, feeder_running) # 파서, 다운로더 프로세스가 종료되지않은 경우 대기 for p in processes: p.join() self._logger.info('작업 소요시간: {}초' \ .format(round(time.time() - start, 2)), callback=False) self._logger.info('전체 이미지 수: {}'.format(count.value), callback=False) self._done()
class OrderData: SKIP_ROWS = 4 def __init__(self, fileName): self.fileName = fileName self.database = Database() self.logger = Logger() self.invalidOrders = 0 def process(self): self.logger.info('Starting OrderData import') with open(os.getcwd() + "/watch/" + self.fileName, encoding='utf-8') as csv_file: csv_reader = csv.reader(csv_file, delimiter=';') for _ in range(self.SKIP_ROWS): next(csv_reader) headers = next(csv_reader) orders = [] rowCount = self.SKIP_ROWS currentOrder = [] for order in csv_reader: rowCount += 1 if len(order) > 0: if order[headers.index('Totaalprijs')] != '': if currentOrder: orders.append(currentOrder) currentOrder = [] currentOrder.append(order + [rowCount]) start = time.time() self.logger.info("Processing {} orders".format(len(orders))) self.createOrderObj(headers, orders) self.logger.info('Import completed in: {}'.format(time.time() - start)) self.logger.info('Invalid orders: {}'.format(self.invalidOrders)) def createOrderObj(self, headers, orders): for order in orders: restaurantName = order[0][headers.index('Winkelnaam')] restaurantId = Restaurant.getStoreByName(self.database, restaurantName) customerName = order[0][headers.index('Klantnaam')] customerPhoneNr = order[0][headers.index('TelefoonNr')] customerEmail = order[0][headers.index('Email')] address = order[0][headers.index('Adres')] city = order[0][headers.index('Woonplaats')] addressId = Address.getOrCreateAddress(self.database, address, city) customerId = Customer.createOrUpdateCustomer( self.database, customerEmail, customerName, customerPhoneNr, addressId) orderDate = parseDate(order[0][headers.index('Besteldatum')]) deliveryTypeString = order[0][headers.index('AfleverType')] if deliveryTypeString == 'Bezorgen': deliveryType = True elif deliveryTypeString == 'Afhalen': deliveryType = False else: deliveryType = None deliveryDate = parseDate(order[0][headers.index('AfleverDatum')]) deliveryTime = order[0][headers.index('AfleverMoment')] if re.match('^([0[0-9]|1[0-9]|2[0-3]):[0-5][0-9]$', deliveryTime): hour, minute = deliveryTime.split(':') deliveryDate = deliveryDate.replace(hour=int(hour), minute=int(minute)) totalPrice = priceToFloat(order[0][headers.index('Totaalprijs')]) couponName = order[0][headers.index('Gebruikte Coupon')].strip() if len(couponName) > 0: couponId = Coupon.createCouponIfNotExists( self.database, couponName) else: couponId = None couponDiscount = priceToFloat( order[0][headers.index('Coupon Korting')]) paymentAmount = priceToFloat(order[0][headers.index('Te Betalen')]) orderObj = Order.Order(restaurantId, customerId, deliveryType, couponDiscount, orderDate, deliveryDate, couponId, rowNumber=order[0][-1]) for orderRow in order: productName = orderRow[headers.index('Product')].strip() crustName = orderRow[headers.index('PizzaBodem')] crustId = Product.getPizzaCrustByName(self.database, crustName) sauceName = orderRow[headers.index('PizzaSaus')] sauceId = Product.getSauceByName(self.database, sauceName) if crustName == '' and sauceName == '': otherProductId = Product.getOtherProductIdByName( self.database, productName) pizzaId = None else: pizzaId = Product.getPizzaIdByName(self.database, productName) if pizzaId is None: pass otherProductId = None price = priceToFloat(orderRow[headers.index('Prijs')]) deliveryCosts = priceToFloat( orderRow[headers.index('Bezorgkosten')]) amount = int(orderRow[headers.index('Aantal')]) extraIngredientString = orderRow[headers.index( 'Extra Ingrediënten')] if len(extraIngredientString.strip()) > 0: extraIngredients = [ Product.getIngredientByName(self.database, ingredient.strip()) for ingredient in extraIngredientString.split(',') ] else: extraIngredients = [] priceExtraIngredients = priceToFloat( orderRow[headers.index('Prijs Extra Ingrediënten')]) orderRowPrice = priceToFloat( orderRow[headers.index('Regelprijs')]) orderLine = Order.OrderLine(orderRowPrice, amount, extraIngredients, sauceId, crustId, pizzaId, otherProductId, rowNumber=orderRow[-1]) orderObj.addOrderLine(orderLine) self.createOrder(orderObj) def createOrder(self, orderObj): orderValid, orderErrors, = orderObj.isValid() if orderValid: orderId = Order.createOrder(self.database, orderObj) for orderLine in orderObj.orderLines: Order.createOrderLine(self.database, orderLine, orderId) else: self.logger.error('Order not valid row: {}, error: {}'.format( orderObj.rowNumber, ', '.join(orderErrors))) for row in orderObj.orderLines: rowValid, rowErrors = row.isValid() if not rowValid: self.logger.error( 'Order line not valid row: {}, error: {}'.format( row.rowNumber, ', '.join(rowErrors))) self.invalidOrders += 1
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-a', '--anime', help='add anime url', type=str) parser.add_argument('-e', '--episode', help='enter episode range (1-10) or just 1 to download single episode', type=str) parser.add_argument('-d', '--download', help='download path, by defult download to current directory', type=str) args = parser.parse_args() # terminal can automatically add escape sequences, so remove it anime = args.anime.replace('\\', '') if args.anime else args.anime # check if given anime url is valid if not helper.is_valid_anime(anime): print('Invalid anime url') print('I.e. http://animeheaven.eu/i.php?a=Bakuman.') exit(1) episodes = helper.get_episodes(args.episode) # check if given episode url is valid if not episodes: print('Invalid episode(s)') exit(1) # check if download path exists DOWNLOAD_PATH = args.download if args.download else DOWNLOAD_PATH LOGGER.info(f'Download path: {DOWNLOAD_PATH}') app = App(anime, DOWNLOAD_PATH) for ep in episodes: app.download(ep)
class EMRecognition: def __init__(self, *args, **kwargs): ''' Starting point for the whole project. ''' '''Recognition_modules stores classes of every method for EMR recognition''' self.recognition_classes = [PhoneticSubstitution, NicknameGeneration, SpellingDecomposition, Translation, Characteristic] ''' recognition_objects stores instances of every module as {<module_name>: ['object': <module_object>, 'confidence': <float>], ...}''' self.recognition_modules = {} ''' logger prints and stores log files and is passed on to every instance''' self.logger = Logger() '''ner_mode: 0 - Rule Based NER, 1 - Stanford NER, 2 - Given NER List''' self.ner_mode = kwargs.get("ner_mode", 0) '''enabled_module: binary ''' self.enabled_module = kwargs.get('enabled_module', 31) # Load a dictionary with all celebrities' names known_name_list = [] with open(os.path.join(config.DICT_ROOT, "name_list.txt"), encoding='utf-8') as f: for line in f: known_name_list.append(line.strip()) args = [] kwargs = { 'logger': self.logger, 'name_list': known_name_list, } for i in range(len(self.recognition_classes)): if self.enabled_module & (1 << i) == 0: continue _class = self.recognition_classes[i] class_name = _class.__name__ self.logger.info("[Core] Initializing module " + class_name) self.recognition_modules[class_name] = { 'instance': _class(*args, **kwargs), 'confidence': 1.0 } self.ner_module = ChineseNER(args,kwargs) pass def recognize_tweet(self, tweet, morphs=None): ''' Extract name entity morphs from a tweet. :param tweet: (String) content of the tweet(weibo) :return: (Dict) { <String> : [(String, float) {5}], <String> : [(String, float) {5}], ... } Explanation: { <morph> : [0-5 * (<name>, <confidence_score>)], <morph> : [0-5 * (<name>, <confidence_score>)], } A tweet may have multiple morphs, thus we use a dictionary to store possible names for every morph. ''' # Extract morphs from tweet: extracted_morphs = None if self.ner_mode == 0: extracted_morphs = self.ner_module.extract_morph(tweet) extracted_morphs = [m for m in extracted_morphs if re.match(r"^[0-9 ,.:]+$", m) is None] elif self.ner_mode == 1: extracted_morphs = self.ner_module.extract_name_entities_from_sentence(tweet) elif self.ner_mode == 2: extracted_morphs = morphs self.logger.info("Morphs: " + " ".join(extracted_morphs)) # Recognize with every method and generate a list of # possible names from each method candidate_lists = {} results = {} for morph in extracted_morphs: self.logger.info("Dealing with morph: %s " % morph) results[morph] = {} for module_name in self.recognition_modules: module = self.recognition_modules[module_name]['instance'] try: results[morph][module_name] = module.get_similar_names(morph) except Exception as e: self.logger.warning("[Core] %s raised an exception. %s." % (module_name, str(e))) results[morph][module_name] = {} morph_result = self.combine_results(morph, results[morph]) candidate_lists[morph] = list(morph_result.keys()) return candidate_lists def combine_results(self, morph, morph_result): ''' Get final score of possible entity names combining all methods :param morph: (string) morph computing :param morph_result: (dict) entity-confidence score pairs of each module (result[morph] in recognize_tweet) :return: (Dict) { <String> : [(float) {5}], <String> : [(float) {5}], ... } Explanation: { <name> : [0-5 * <confidence_score>], <morph> : [0-5 * <confidence_score>], } ''' result = [] prob, prior = self.train_nbmodel() for module in morph_result: probability = prior[module] if morph in prob: probability *= prob[morph][module] for candidate in morph_result[module]: new_score = probability * morph_result[module][candidate] heapq.heappush(result, (new_score, candidate)) result = sorted(result, key=lambda x: x[0], reverse=True) self.logger.info("[Core] Final Result : " + str(result)) return {name: score for (score, name) in result[:5]} def train_nbmodel(self): pri = {} total_size = 0 pro = {} using_classes = self.recognition_modules.keys() with open(os.path.join(config.DICT_ROOT, "method_classification.txt"), encoding='utf-8') as f: for line in f: data = line.strip().split(" ") classes = data[1].split(",") for c in classes: if c not in using_classes: continue total_size += 1 pri[c] = pri.get(c, 0) + 1 if data[0] not in pro: pro[data[0]] = {} pro[data[0]][c] = pro[data[0]].get(c, 0) + 1 for i in pro: for class_name in using_classes: pro[i][class_name] = (pro[i].get(class_name, 0) + 1) / (pri[class_name] + len(pro)) for p in pri: pri[p] = pri.get(p) / total_size return pro, pri