class Post(Base): """ Represents a reddit post with additional data :attr id: Integer, generated sql pk :attr reddit_fullname: str, reddit type identifier 't3_' + submission id ex. 'a4hafgh' :attr mpn: str, manufacturer part number for linked product :attr price: int, rounded price of product at date of instantiation :attr date: Date, date of instantiation :attr site: str, domain of linked product ex 'microcenter.com' """ __tablename__ = 'posts' id = Column(Integer, primary_key=True) reddit_fullname = Column(String(15), nullable=False, unique=True) mpn = Column(String(30)) price = Column(Integer) date = Column(Date) site = Column(String(50)) post_logger = logger.get_logger('Post', './logfile.log') def __init__( self, reddit_fullname: str, mpn: str, price: int, date: datetime.date, site: str, ): self.reddit_fullname = reddit_fullname self.mpn = mpn self.price = price self.date = date self.site = site @validates('mpn', 'site') def validate_lengths(self, key, value): """ For attributes in decorator, check against max value len, and truncate if needed :param key: str, each str passed in from decorator :param value: str, passed in by sqlalchemy :return: str, the shorter of value and value[:30], further handled by sqlalchemy """ max_len = getattr(self.__class__, key).prop.columns[0].type.length if value and len(value) > max_len: self.post_logger.warning(f'{key}: {value} - ' f'violated max length and was truncated') return value[:max_len] return value def __repr__(self): return f'<Post ({self.date} - {self.reddit_fullname}, {self.mpn})>'
def main(args): config = ConfigParser(args) cfg = config.config logger = get_logger(config.log_dir, "train") validation_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) validation_dataset = CTImageLoaderTest( link_label_file=cfg["validation_data"], image_size=cfg["input_size"], root_folder=cfg["root_folder"], transforms=validation_transform) vali_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=cfg["vali_batch_size"], shuffle=False, num_workers=cfg["workers"], drop_last=False) model = resnet50(number_class=3, pretrained=True) checkpoint = torch.load(cfg['resume']) state_dict = checkpoint['state_dict'] model.load_state_dict(state_dict) device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') model = model.to(device) model.eval() trans = transforms.ToPILImage() with torch.no_grad(): for i, (data, target, links) in enumerate(vali_loader): data, target = data.to(device), target.to(device) output = model(data) _, pred = torch.max(output, dim=1) for j in range(len(links)): print(links[j]) print(pred[j].item()) print(target[j].item()) # if pred[j].item() == target[j].item(): # continue image = data[j] * 0.5 + 0.5 image = trans(image.cpu()) # image = image.cpu().data.numpy() # print(image) image = np.array(image)
def generate(name_type, language, amount=1): connection = connect.connect() cursor = connection.cursor(cursor_factory=psycopg2.extras.DictCursor) generator_module = import_module('tools.name_generator.' + language) generator = getattr(generator_module, name_type)(cursor) names = generator.generate(amount) sb_logger = logger.get_logger('name_generator') sb_logger.debug("Name generator called with name_type: {} language: {} amount: {}".format( name_type, language, amount)) if len(names) == 0: sb_logger.warning("No names generated.") return False else: sb_logger.debug("{} names generated.".format(len(names))) return names
def write_log(level, message, request=None): frame = inspect.stack()[1] module = inspect.getmodule(frame[0]) extras = {} extras['caller_module'] = module.__name__ extras['caller_function'] = frame[3] if request is not None: if request.META.get('REMOTE_ADDR'): extras['ip'] = request.META.get('REMOTE_ADDR') if request.user.id: extras['user_id'] = request.user.id if request.META.get('HTTP_USER_AGENT'): extras['user_agent'] = request.META.get('HTTP_USER_AGENT') sb_logger = logger.get_logger(logger_name='frontend') severity_call = getattr(sb_logger, level) severity_call(message, extra=extras)
import requests from lxml import html from stores.registration import register from logger import logger frys_logger = logger.get_logger('Frys', './logfile.log') def get_page(url: str): """Simple request based on url""" headers = { 'DNT': '1', 'Host': 'www.frys.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', } return requests.get(url, headers=headers) def get_price(tree: html.HtmlElement): """ Parses price from page :param tree: html.HtmlElement from lxml :return: int, rounded, if exists; else None """ path = '//span[@id="did_price1valuediv"]' try: price_tag = tree.xpath(path)[0].text price = int(round(float(price_tag[1:]))) except IndexError as e:
import re from lxml import html import requests from stores.registration import register from logger import logger amazon_logger = logger.get_logger('Amazon', './logfile.log') def get_page(url: str): """Simple request based on url""" headers = { 'DNT': '1', 'Host': 'www.amazon.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', } return requests.get(url, headers=headers) def get_xpath(path: str, tree: html.HtmlElement): """ Looks for path/element in tree :param path: str, valid xpath search string :param tree: html.HtmlElement from lxml :return: element, based on path; or None if not found """ try: return tree.xpath(path)[0] except IndexError:
import worker from shutil import rmtree from time import sleep from algthm.utils.file import dir_empty from cfg.loader import cfg from multiprocessing import Process from logger import logger from dex.core.db import MongoConnection from dex.core.exceptions.indexer import IndexerBootFailure from logging import CRITICAL, getLogger from datetime import datetime from elasticsearch import Elasticsearch, ElasticsearchException logger.setup_logging() logger = logger.get_logger('dex') pika_logger = getLogger('pika') pika_logger.setLevel(CRITICAL) def initialize_workers(num_workers, target, daemon=True): """ Initializes the worker processes. """ workers = [] process = None print '> initializing {} workers ..'.format(num_workers), for i in range(num_workers): try:
import os from utils.path_utils import get_app_data_path from logger.logger import get_logger from trainer import custom_accuracy from trainer.decision_tree_classifier import DTClassifier from trainer.random_forest_classifier import RFClassifier from trainer.adaboost_classifier import AdaboostClassifier from trainer.xgboost_classifier import XGBClassifier from trainer.logistic_regression_classifier import LRClassifier from trainer.knn_classifier import KNNClassifier from trainer.ann_classifier import ANNClassifier from non_ml.non_ml_classifier import NonMLClassifier from utils.load_and_process import DataLoader from sklearn.metrics import precision_score, recall_score, f1_score, balanced_accuracy_score, accuracy_score logger = get_logger() class TrainingDetails(object): def __init__(self, ds, ds_name, seed): self.ds = ds self.ds_name = ds_name self.seed = seed def run_experiment(ds, experiment, timing_key, verbose, timings): """ :param ds: :param experiment: :param timing_key:
import sys from logger import logger log = logger.get_logger() class GenericException(Exception): def __init__(self, message): self.message = message log.error(str(message) + "\n") class ConfigError(GenericException): pass class ParseError(GenericException): pass
#!/usr/bin/python # -*- coding: utf-8 -*- # dht.py """ [#11] Add and implement the measurement of temperature and humidity author: Thomas Kaulke, [email protected] """ from __future__ import absolute_import import Adafruit_DHT import conf.greenhouse_config as conf import logger.logger as log logging = log.get_logger() lib = conf.lib sensor = Adafruit_DHT.DHT22 pin = conf.DHT_PIN def get_values(): global temperature global humidity logging.info('Get temperature and humidity values.') humidity, temperature = Adafruit_DHT.read_retry(sensor, pin) if humidity is not None and temperature is not None: logging.info(('{0}{1}{2}'.format(conf.temp_format, lib.space, conf.hum_format)).format( temperature, humidity)) else: logging.warning(
import time import praw from logger import logger logger = logger.get_logger('RedditHandler', './logfile.log') class RedditHandler: @staticmethod def get_subreddit(sub_to_init: str): reddit = praw.Reddit() subreddit = reddit.subreddit(sub_to_init) return subreddit @staticmethod def reply_to_submission(submission: praw.Reddit.submission, markdown: str): """ Attempts to post comment to reddit submission; sleeps and retries if ratelimit enforced by reddit :param submission: praw.Reddit.submission, submission to reply to :param markdown: str, formatted markdown for reddit :return: nothing """ if markdown is not None: logger.info('attempting reply...') try: submission.reply(markdown) except praw.exceptions.APIException as e: logger.error(e.message)
import re import requests from logger import logger from stores.registration import register newegg_logger = logger.get_logger('Newegg', './logfile.log') def convert_mobile_url(url: str): """ Check for m.newegg.com... :param url: str, mobile newegg url :return: str, non-mobile link """ if 'm.newegg.com' in url: base_url = 'https://www.newegg.com/Product/Product.aspx?Item=' pattern = '(?s)(?<=products/)[A-Za-z0-9]*' item = re.search(pattern, url) try: item = item.group(0).strip() except AttributeError as e: newegg_logger.error(f'{e.__class__}: {e}') return None else: url = base_url + item return url def get_page(url: str):
import re import requests from lxml import html from stores.registration import register from logger import logger bestbuy_logger = logger.get_logger('BestBuy', './logfile.log') def get_page(url: str): """Simple request based on url""" headers = { 'DNT': '1', 'Host': 'www.bestbuy.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', } return requests.get(url, headers=headers) def get_price(text: str): """ Parses for price :param text: str, from requests.get().text :return: int, rounded, if exists; else None """ pattern = '(?<=customerPrice":)(.*?)(?=,)' data = re.search(pattern, text) try: data = data.group(0).strip()
import re import requests from lxml import html from stores.registration import register from logger import logger bestbuy_logger = logger.get_logger('BestBuy', './logfile.log') def get_page(url: str): """Simple request based on url""" headers = { 'DNT': '1', 'Host': 'www.bestbuy.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', } return requests.get(url, headers=headers) def get_price(text: str): """ Parses for price :param text: str, from requests.get().text :return: int, rounded, if exists; else None """ pattern = '(?<=customerPrice":)(.*?)(?=,)' data = re.search(pattern, text) try:
class Flusher(object): """ The flusher is responsible for translating Collectd metrics to CloudWatch MetricDataStatistic, batching, aggregating and flushing metrics to CloudWatch endpoints. Keyword arguments: config_helper -- The ConfigHelper object with configuration loaded """ _LOGGER = get_logger(__name__) _FLUSH_INTERVAL_IN_SECONDS = 60 _FLUSH_DELTA_IN_SECONDS = 1 _MAX_METRICS_PER_PUT_REQUEST = 20 _MAX_METRICS_TO_AGGREGATE = 2000 def __init__(self, config_helper): self.lock = threading.Lock() self.client = None self.config = config_helper self.metric_map = {} self.last_flush_time = time.time() self.nan_key_set = set() self.enable_high_resolution_metrics = config_helper.enable_high_resolution_metrics self.flush_interval_in_seconds = int(config_helper.flush_interval_in_seconds if config_helper.flush_interval_in_seconds else self._FLUSH_INTERVAL_IN_SECONDS) self.max_metrics_to_aggregate = self._MAX_METRICS_PER_PUT_REQUEST if self.enable_high_resolution_metrics else self._MAX_METRICS_TO_AGGREGATE self.client = PutClient(self.config) def is_numerical_value(self, value): """ Assume that the value from collectd to this plugin is float or Integer, if string transfer from collectd to this interface, we should modify the method _add_values_to_metric, to convert the string type value to float type value. Returns: True if the value is float and is not nan False if the value is nan """ try: return not math.isnan(float(value)) except ValueError: return False def add_metric(self, value_list): """ Translates Collectd metrics to CloudWatch format and stores them in flusher for further processing such as batching and aggregating. Keyword arguments: value_list -- The ValueList object passed by Collectd to the write callback """ with self.lock: # The flush operation should take place before adding metric for a new minute. # Together with flush delta this ensures that old metrics are flushed before or at the start of a new minute. self._flush_if_need(time.time()) if self.config.whitelist.is_whitelisted(self._get_metric_key(value_list)): self._aggregate_metric(value_list) def _flush_if_need(self, current_time): """ Checks if metrics should be flushed and starts the flush procedure """ if self._is_flush_time(current_time): if self.config.debug and self.metric_map: state = "" for dimension_metrics in self.metric_map: state += str(dimension_metrics) + "[" + str(self.metric_map[dimension_metrics][0].statistics.sample_count) + "] " self._LOGGER.info("[debug] flushing metrics " + state) self._flush() def _is_flush_time(self, current_time): if self.enable_high_resolution_metrics: return (current_time - self.last_flush_time) >= self.flush_interval_in_seconds + self._FLUSH_DELTA_IN_SECONDS return (current_time - self.last_flush_time) + self._FLUSH_DELTA_IN_SECONDS >= self.flush_interval_in_seconds def record_nan_value(self, key, value_list): if key not in self.nan_key_set: self._LOGGER.warning( "Adding Metric value is not numerical, key: " + key + " value: " + str(value_list.values)) self.nan_key_set.add(key) def _aggregate_metric(self, value_list): """ Selects existing metric or adds a new metric to the metric_map. Then aggregates values from ValueList with the selected metric. If the size of metric_map is above the limit, new metric will not be added and the value_list will be dropped. """ nan_value_count = 0 dimension_key = self._get_metric_key(value_list) adjusted_time = int(value_list.time) key = dimension_key if self.enable_high_resolution_metrics: key = dimension_key + "-" + str(adjusted_time) if key in self.metric_map: nan_value_count = self._add_values_to_metrics(self.metric_map[key], value_list) else: if len(self.metric_map) < self.max_metrics_to_aggregate: nan_value_count = self._add_metric_to_queue(value_list, adjusted_time, key) else: if self.enable_high_resolution_metrics: if self.config.debug and self.metric_map: state = "" for dimension_metrics in self.metric_map: state += str(dimension_metrics) + "[" + str(self.metric_map[dimension_metrics][0].statistics.sample_count) + "] " self._LOGGER.info("[debug] flushing metrics " + state) self._flush() nan_value_count = self._add_metric_to_queue(value_list, adjusted_time, key) else: self._LOGGER.warning("Batching queue overflow detected. Dropping metric.") if nan_value_count: self.record_nan_value(dimension_key, value_list) def _add_metric_to_queue(self, value_list, adjusted_time, key): nan_value_count = 0 metrics = MetricDataBuilder(self.config, value_list, adjusted_time).build() nan_value_count = self._add_values_to_metrics(metrics, value_list) if nan_value_count != len(value_list.values): self.metric_map[key] = metrics return nan_value_count def _get_metric_key(self, value_list): """ Generates key for the metric. The key must use both metric_name and plugin instance to ensure uniqueness. """ return value_list.plugin + "-" + value_list.plugin_instance + "-" + value_list.type + "-" +value_list.type_instance def _add_values_to_metrics(self, dimension_metrics, value_list): """ Aggregates values from value_list with existing metric Add the valid value to the metric and just skip the nan value. Returns: return the count of the nan value in value_list """ for metric in dimension_metrics: nan_value_count = 0 for value in value_list.values: if self.is_numerical_value(value): metric.add_value(value) else: nan_value_count += 1 return nan_value_count def _flush(self): """ Batches and puts metrics to CloudWatch """ self.last_flush_time = time.time() metric_map_size = len(self.metric_map) if self.metric_map: prepare_batch = self._prepare_batch() try: while True: metric_batch = prepare_batch.next() if not metric_batch: break self.client.put_metric_data(MetricDataStatistic.NAMESPACE, metric_batch) if len(metric_batch) < self._MAX_METRICS_PER_PUT_REQUEST: break except StopIteration, e: if metric_map_size % self._MAX_METRICS_PER_PUT_REQUEST != 0 or len(self.metric_map) != 0: self._LOGGER.error("_flush error: " + str(e) + " Original map size: " + str(metric_map_size))
def main(): logger = get_logger(__name__) logger.info("hello project one!") lib_one_main()
import re import requests from lxml import html from logger import logger from stores.registration import register from templates import eb_template ebay_logger = logger.get_logger('Ebay', './logfile.log') def convert_pages_url(url: str): """ Given ebay Pages url, retrieve item page and continue parsing. If given standard url, immediately return it :param url: str, ebay url :return: str, /itm/ url; else None """ if 'ebay.com/p/' in url: text = get_page(url).text base_url = 'https://www.ebay.com/itm/' pattern = '(?s)(?<=data-itemid=")(.*?)(?=")' item = re.search(pattern, text) try: item = item.group(0).strip() except AttributeError as e: ebay_logger.error(f'{e.__class__}:{e}') return None else:
Entry Point for the Search API. """ import tornado.web from search.controllers import DefaultHandler from search.controllers import query_handler from search.controllers import metric_handler from search.controllers import autosuggest from logger import logger from cfg.loader import cfg # ------------------------------------------------------------------------------ # Configure Logging # ------------------------------------------------------------------------------ logger.setup_logging() logger = logger.get_logger('search_api') # ------------------------------------------------------------------------------ # Configure Application # ------------------------------------------------------------------------------ application = tornado.web.Application( [ (r"/", DefaultHandler), (r"/query", query_handler.QueryHandler), (r"/metrics/([A-z0-9]+)", metric_handler.MetricHandler), (r"/_auto", autosuggest.AutoSuggest), ] , debug=True) def main():
import json import re import requests from lxml import html from stores.registration import register from templates import mc_template from logger import logger mc_logger = logger.get_logger('Microcenter', './logfile.log') def strip_url(url: str): """ Given a Microcenter URL, if query string is present, strip it and return stripped string. Titles of products should not contain '?' :param url: str, microcenter url :return: str, stripped of query string as necessary """ if 'storeID=' in url: begin_id = url.find('storeID=') end_id = begin_id + 11 # len('storeID=095') url = url[:begin_id] + url[end_id:] mc_logger.info(f'url: {url}') return url def get_page(url: str, store_num: str= '095'): """
import time import praw from logger import logger logger = logger.get_logger('RedditHandler', './logfile.log') class RedditHandler: @staticmethod def get_subreddit(sub_to_init: str): reddit = praw.Reddit() subreddit = reddit.subreddit(sub_to_init) return subreddit @staticmethod def reply_to_submission(submission: praw.Reddit.submission, markdown: str): """ Attempts to post comment to reddit submission; sleeps and retries if ratelimit enforced by reddit :param submission: praw.Reddit.submission, submission to reply to :param markdown: str, formatted markdown for reddit :return: nothing """ if markdown is not None: logger.info('attempting reply...') try: submission.reply(markdown)
def __init__(self, sub_to_stream: str): self.logger = logger.get_logger('Bot', './logfile.log') self.logger.info(f'initializing on {sub_to_stream}...') self.subreddit = RedditHandler.get_subreddit(sub_to_stream) self.logger.info('initialized')
from flask import Flask, Response, request import json from logger.logger import get_logger from config.config import LOGGER_CONFIG from handlers.http import HTTPHandler from handlers.predict import ModelPredict from handlers import model app = Flask(__name__) logger = get_logger(LOGGER_CONFIG) model = model.load(logger) @app.route('/<SERVICE-PREFIX>/health', methods=['GET']) def health(): res = {"message": "I am alive"} return Response(json.dumps(res), 200, mimetype='application/json') @app.route('/<SERVICE-PREFIX>/v1/predict', methods=['POST']) def predict(): req = HTTPHandler(request.get_json(), request.headers.get('X-Request-ID')) req_id, roi, err, res_stat = req.validate(logger) if err is not None: return Response(err, mimetype='application/json', status=res_stat.code) pred = ModelPredict(model, roi) res, err, res_stat = pred.run(req_id, logger) if err is not None: return Response(err, mimetype='application/json', status=res_stat.code)
def main(): logger = get_logger(__name__) logger.info("hello lib one")
import requests from lxml import etree, html from stores.registration import register from logger import logger frys_logger = logger.get_logger('Frys', './logfile.log') def get_page(url: str): """Simple request based on url""" headers = { 'DNT': '1', 'Host': 'www.frys.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', } return requests.get(url, headers=headers) def get_price(tree: html.HtmlElement): """ Parses price from page :param tree: html.HtmlElement from lxml :return: int, rounded, if exists; else None """ path = '//span[@id="did_price1valuediv"]' try: price_tag = tree.xpath(path)[0].text price = int(round(float(price_tag[1:])))
def logger(): return get_logger()
import json import re import requests from lxml import html from stores.registration import register from templates import mc_template from logger import logger mc_logger = logger.get_logger('Microcenter', './logfile.log') def strip_url(url: str): """ Given a Microcenter URL, if query string is present, strip it and return stripped string. Titles of products should not contain '?' :param url: str, microcenter url :return: str, stripped of query string as necessary """ if 'storeID=' in url: begin_id = url.find('storeID=') end_id = begin_id + 11 # len('storeID=095') url = url[:begin_id] + url[end_id:] mc_logger.info(f'url: {url}') return url def get_page(url: str, store_num: str = '095'): """ Given a Microcenter URL, return request object
def main(args): config = ConfigParser(args) cfg = config.config logger = get_logger(config.log_dir, "train") train_dataset = MRIBrainSegmentation(root_folder=cfg['root_folder'], image_label=cfg['train_data'], is_train=True, ignore_label=0, input_size=cfg['input_size']) vali_dataset = MRIBrainSegmentation(root_folder=cfg['root_folder'], image_label=cfg['validation_data'], is_train=False, ignore_label=0, input_size=cfg['input_size']) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg["train_batch_size"], shuffle=True, num_workers=cfg["workers"], drop_last=True) vali_loader = torch.utils.data.DataLoader( vali_dataset, batch_size=cfg["vali_batch_size"], shuffle=False, num_workers=cfg["workers"], drop_last=False) if cfg['net_name'] == "deeplab": model = DeepLab(num_classes=1, backbone=cfg['backbone'], output_stride=cfg['output_stride'], sync_bn=cfg['sync_bn'], freeze_bn=cfg['freeze_bn']) else: model = Unet(in_channels=3, out_channels=1, init_features=32) criterion = getattr(loss, 'dice_loss') optimizer = optim.SGD(model.parameters(), lr=cfg["lr"], momentum=0.9, weight_decay=cfg["weight_decay"]) metrics_name = [] scheduler = Poly_Scheduler(base_lr=cfg['lr'], num_epochs=config['epoch'], iters_each_epoch=len(train_loader)) trainer = Trainer(model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, nb_epochs=config['epoch'], valid_loader=vali_loader, lr_scheduler=scheduler, logger=logger, log_dir=config.save_dir, metrics_name=metrics_name, resume=config['resume'], save_dir=config.save_dir, device="cuda:0", monitor="max iou_class_1", early_stop=-1) trainer.train()
import requests from stores.registration import register from logger import logger # TODO change 'store_name' to name of store being parsed store_name_logger = logger.get_logger('Store_name', './logfile.log') """ The below functions are just suggestions, they do not have to be implemented in exactly this manner, or at all """ def get_html(url: str): # do something pass def extract_from_html(pattern: str, html: str): # do something pass def get_mpn(html: str): # do something pass def get_price(html: str):
import re import requests from stores.registration import register from logger import logger rakuten_logger = logger.get_logger('Rakuten', './logfile.log') def get_page(url: str): """Simple request based on url""" headers = { 'DNT': '1', 'Host': 'www.rakuten.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', } return requests.get(url, headers=headers) def get_price(text: str): """ Parses for price :param text: str, from requests.get().text :return: int, rounded, if exists; else None """ pattern = '(?<="price" content=")(.*?)(?="/>)' data = re.search(pattern, text) try: data = data.group(0).strip() except AttributeError as e: # Combo deals/splash pages/etc
import pandas as pd # Import user libraries from configuration import Configuration from db.sql_metadata_service import SQLMetadataService from logger.logger import get_logger from etl.ETL import ETL # Get the configuration object to access config variables configuration = Configuration() # Create logger object with given configuration logger = get_logger( logging_level=configuration.get_logging_level(), logs_output_file_path=configuration.get_logs_output_file_path(), logs_rotate_when=configuration.get_logs_rotate_when(), logs_rotate_backup_count=configuration.get_logs_rotate_backup_count() ) def main(argv): """ Main function and the entry point of the lending club loan ETL application. Parameters ---------- argv (Type: str list): Command line arguments Returns ---------- None """
import argparse import os import random import time from logger import logger log = logger.get_logger(__name__) # python -m tools.youget.download if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("url", help="video url") parser.add_argument("-o", "--output", help="output folder for downloading", default=".") parser.add_argument("-v", "--verbose", help="verbose output", action="store_true") # flag, if specified True args = parser.parse_args() output = os.path.abspath(args.output) log.info(f"start downloading video on {args.url} to {output}") os.chdir(output) # change working dir for n in range(144, 149): cmd = 'you-get --format=dash-flv480 https://www.bilibili.com/video/BV1w7411v74u?p=' + str( n) log.info("executing: " + cmd) time.sleep(random.randint(0, 3)) # mock human behavior res = os.system(cmd)
#!/usr/bin/env python3 import time from crawler.config import CrawlerConfig from crawler.crawler import Crawler from logger.logger import get_logger LOGGER = get_logger() CONFIG = CrawlerConfig(LOGGER, max_depth=1) def worker(unit: dict): crawler: Crawler = unit['crawler'] try: (files_downloaded, exceptions) = crawler.crawl(unit['targets']) LOGGER.info("Downloaded %s files", files_downloaded) if exceptions: LOGGER.error("found %s errors!", len(exceptions)) for ex in exceptions: LOGGER.error(ex) except Exception as ex: raise ex def main(): started_at = time.monotonic() for unit in CONFIG.workload: worker(unit) elasped_time = time.monotonic() - started_at
import re import requests from lxml import html from logger import logger from stores.registration import register from templates import eb_template ebay_logger = logger.get_logger('Ebay', './logfile.log') def convert_pages_url(url: str): """ Given ebay Pages url, retrieve item page and continue parsing. If given standard url, immediately return it :param url: str, ebay url :return: str, /itm/ url; else None """ if 'ebay.com/p/' in url: text = get_page(url).text base_url = 'https://www.ebay.com/itm/' pattern = '(?s)(?<=data-itemid=")(.*?)(?=")' item = re.search(pattern, text) try: item = item.group(0).strip() except AttributeError as e: ebay_logger.error(f'{e.__class__}:{e}') return None else: url = base_url + item
## be met: https://www.gnu.org/licenses/gpl-3.0.html. ## ## $QT_END_LICENSE$ ## ############################################################################# from string import Template from time import sleep from distutils.version import LooseVersion from typing import Any, Dict, List, Optional, Tuple import jira # type: ignore from config import Config from git import FixedByTag from logger import logger log = logger.get_logger('jira') comment_template = Template( """A change related to this issue (sha1 '$sha1') was integrated in '$repository' in the '$branch' branch. This change will be in version: $fix_version - (JIRA: $version_id). Subject: {{$subject}}" """) class JiraCloser: def __init__(self, config: Config) -> None: self.config = config self.jira_url = self.config.jira_url self.jira_client = jira.JIRA(self.jira_url, oauth=self.config.get_oauth_data())
class Flusher(object): """ The flusher is responsible for translating Collectd metrics to CloudWatch MetricDataStatistic, batching, aggregating and flushing metrics to CloudWatch endpoints. Keyword arguments: config_helper -- The ConfigHelper object with configuration loaded """ _LOGGER = get_logger(__name__) _FLUSH_INTERVAL_IN_SECONDS = 60 _FLUSH_DELTA_IN_SECONDS = 1 _MAX_METRICS_PER_PUT_REQUEST = 20 _MAX_METRICS_TO_AGGREGATE = 2000 def __init__(self, config_helper): self.lock = threading.Lock() self.client = None self.config = config_helper self.metric_map = {} self.last_flush_time = time.time() def add_metric(self, value_list): """ Translates Collectd metrics to CloudWatch format and stores them in flusher for further processing such as batching and aggregating. Keyword arguments: value_list -- The ValueList object passed by Collectd to the write callback """ with self.lock: # The flush operation should take place before adding metric for a new minute. # Together with flush delta this ensures that old metrics are flushed before or at the start of a new minute. self._flush_if_need(time.time()) if self.config.whitelist.is_whitelisted( self._get_metric_key(value_list)): self._aggregate_metric(value_list) def _flush_if_need(self, current_time): """ Checks if metrics should be flushed and starts the flush procedure """ if self._is_flush_time(current_time): if self.config.debug and self.metric_map: state = "" for metric in self.metric_map: state += str(metric) + "[" + str( self.metric_map[metric].statistics.sample_count) + "] " self._LOGGER.info("[debug] flushing metrics " + state) self._flush() def _is_flush_time(self, current_time): return ( current_time - self.last_flush_time ) + self._FLUSH_DELTA_IN_SECONDS >= self._FLUSH_INTERVAL_IN_SECONDS def _aggregate_metric(self, value_list): """ Selects existing metric or adds a new metric to the metric_map. Then aggregates values from ValueList with the selected metric. If the size of metric_map is above the limit, new metric will not be added and the value_list will be dropped. """ key = self._get_metric_key(value_list) if key in self.metric_map: self._add_values_to_metric(self.metric_map[key], value_list) else: if len(self.metric_map) < self._MAX_METRICS_TO_AGGREGATE: metric = MetricDataBuilder(self.config, value_list).build() self.metric_map[key] = metric self._add_values_to_metric(metric, value_list) else: self._LOGGER.warning( "Batching queue overflow detected. Dropping metric.") def _get_metric_key(self, value_list): """ Generates key for the metric. The key must use both metric_name and plugin instance to ensure uniqueness. """ return value_list.plugin + "-" + value_list.plugin_instance + "-" + value_list.type + "-" + value_list.type_instance def _add_values_to_metric(self, metric, value_list): """ Aggregates values from value_list with existing metric """ for value in value_list.values: metric.add_value(value) def _flush(self): """ Batches and puts metrics to CloudWatch """ self.last_flush_time = time.time() self.client = PutClient(self.config) while self.metric_map: metric_batch = self._prepare_batch() self.client.put_metric_data(MetricDataStatistic.NAMESPACE, metric_batch) def _prepare_batch(self): """ Removes metrics from the metric_map and adds them to the batch. The batch size is defined by _MAX_METRICS_PER_PUT_REQUEST. """ metric_batch = [] while len(metric_batch ) < self._MAX_METRICS_PER_PUT_REQUEST and self.metric_map: key, metric = self.metric_map.popitem() metric_batch.append(metric) return metric_batch
import re import requests from logger import logger from stores.registration import register newegg_logger = logger.get_logger('Newegg', './logfile.log') def convert_mobile_url(url: str): """ Check for m.newegg.com... :param url: str, mobile newegg url :return: str, non-mobile link """ if 'm.newegg.com' in url: base_url = 'https://www.newegg.com/Product/Product.aspx?Item=' pattern = '(?s)(?<=products/)[A-Za-z0-9]*' item = re.search(pattern, url) try: item = item.group(0).strip() except AttributeError as e: newegg_logger.error(f'{e.__class__}: {e}') return None else: url = base_url + item return url
import re import requests from stores.registration import register from logger import logger rakuten_logger = logger.get_logger('Rakuten', './logfile.log') def get_page(url: str): """Simple request based on url""" headers = { 'DNT': '1', 'Host': 'www.rakuten.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', } return requests.get(url, headers=headers) def get_price(text: str): """ Parses for price :param text: str, from requests.get().text :return: int, rounded, if exists; else None """ pattern = '(?<="price" content=")(.*?)(?="/>)' data = re.search(pattern, text) try: data = data.group(0).strip() except AttributeError as e:
def logged_class(class_reference): log = logger.get_logger(class_reference.__name__) class_reference.logger = log return class_reference