Exemple #1
0
class Post(Base):
    """
    Represents a reddit post with additional data
    :attr id: Integer, generated sql pk
    :attr reddit_fullname: str, reddit type identifier 't3_' + submission id ex. 'a4hafgh'
    :attr mpn: str, manufacturer part number for linked product
    :attr price: int, rounded price of product at date of instantiation
    :attr date: Date, date of instantiation
    :attr site: str, domain of linked product ex 'microcenter.com'
    """
    __tablename__ = 'posts'

    id = Column(Integer, primary_key=True)
    reddit_fullname = Column(String(15), nullable=False, unique=True)
    mpn = Column(String(30))
    price = Column(Integer)
    date = Column(Date)
    site = Column(String(50))

    post_logger = logger.get_logger('Post', './logfile.log')

    def __init__(
        self,
        reddit_fullname: str,
        mpn: str,
        price: int,
        date: datetime.date,
        site: str,
    ):
        self.reddit_fullname = reddit_fullname
        self.mpn = mpn
        self.price = price
        self.date = date
        self.site = site

    @validates('mpn', 'site')
    def validate_lengths(self, key, value):
        """
        For attributes in decorator, check against max value len, and truncate if needed
        :param key: str, each str passed in from decorator
        :param value: str, passed in by sqlalchemy
        :return: str, the shorter of value and value[:30], further handled by sqlalchemy
        """
        max_len = getattr(self.__class__, key).prop.columns[0].type.length
        if value and len(value) > max_len:
            self.post_logger.warning(f'{key}: {value} - '
                                     f'violated max length and was truncated')
            return value[:max_len]
        return value

    def __repr__(self):
        return f'<Post ({self.date} - {self.reddit_fullname}, {self.mpn})>'
Exemple #2
0
def main(args):
    config = ConfigParser(args)
    cfg = config.config
    logger = get_logger(config.log_dir, "train")

    validation_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    validation_dataset = CTImageLoaderTest(
        link_label_file=cfg["validation_data"],
        image_size=cfg["input_size"],
        root_folder=cfg["root_folder"],
        transforms=validation_transform)

    vali_loader = torch.utils.data.DataLoader(
        validation_dataset,
        batch_size=cfg["vali_batch_size"],
        shuffle=False,
        num_workers=cfg["workers"],
        drop_last=False)

    model = resnet50(number_class=3, pretrained=True)
    checkpoint = torch.load(cfg['resume'])
    state_dict = checkpoint['state_dict']
    model.load_state_dict(state_dict)
    device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    model.eval()
    trans = transforms.ToPILImage()
    with torch.no_grad():
        for i, (data, target, links) in enumerate(vali_loader):
            data, target = data.to(device), target.to(device)

            output = model(data)
            _, pred = torch.max(output, dim=1)
            for j in range(len(links)):
                print(links[j])
                print(pred[j].item())
                print(target[j].item())
                # if pred[j].item() == target[j].item():
                #     continue
                image = data[j] * 0.5 + 0.5
                image = trans(image.cpu())
                # image = image.cpu().data.numpy()
                # print(image)
                image = np.array(image)
def generate(name_type, language, amount=1):

  connection = connect.connect()
  cursor = connection.cursor(cursor_factory=psycopg2.extras.DictCursor)

  generator_module = import_module('tools.name_generator.' + language)
  generator = getattr(generator_module, name_type)(cursor)
  names = generator.generate(amount)
  sb_logger = logger.get_logger('name_generator')
  sb_logger.debug("Name generator called with name_type: {} language: {} amount: {}".format(
    name_type, language, amount))

  if len(names) == 0:
    sb_logger.warning("No names generated.")
    return False
  else:
    sb_logger.debug("{} names generated.".format(len(names)))
    return names
def write_log(level, message, request=None):

  frame = inspect.stack()[1]
  module = inspect.getmodule(frame[0])

  extras = {}
  extras['caller_module'] = module.__name__
  extras['caller_function'] = frame[3]

  if request is not None:
    if request.META.get('REMOTE_ADDR'):
      extras['ip'] = request.META.get('REMOTE_ADDR')
    if request.user.id:
      extras['user_id'] = request.user.id
    if request.META.get('HTTP_USER_AGENT'):
      extras['user_agent'] = request.META.get('HTTP_USER_AGENT')

  sb_logger = logger.get_logger(logger_name='frontend')
  severity_call = getattr(sb_logger, level)
  severity_call(message, extra=extras)
Exemple #5
0
import requests

from lxml import html

from stores.registration import register
from logger import logger

frys_logger = logger.get_logger('Frys', './logfile.log')


def get_page(url: str):
    """Simple request based on url"""
    headers = {
        'DNT': '1',
        'Host': 'www.frys.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    }
    return requests.get(url, headers=headers)


def get_price(tree: html.HtmlElement):
    """
    Parses price from page
    :param tree: html.HtmlElement from lxml
    :return: int, rounded, if exists; else None
    """
    path = '//span[@id="did_price1valuediv"]'
    try:
        price_tag = tree.xpath(path)[0].text
        price = int(round(float(price_tag[1:])))
    except IndexError as e:
Exemple #6
0
import re

from lxml import html
import requests

from stores.registration import register
from logger import logger

amazon_logger = logger.get_logger('Amazon', './logfile.log')


def get_page(url: str):
    """Simple request based on url"""
    headers = {
        'DNT': '1',
        'Host': 'www.amazon.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    }
    return requests.get(url, headers=headers)


def get_xpath(path: str, tree: html.HtmlElement):
    """
    Looks for path/element in tree
    :param path: str, valid xpath search string
    :param tree: html.HtmlElement from lxml
    :return: element, based on path; or None if not found
    """
    try:
        return tree.xpath(path)[0]
    except IndexError:
Exemple #7
0
import worker
from shutil import rmtree
from time import sleep
from algthm.utils.file import dir_empty
from cfg.loader import cfg
from multiprocessing import Process
from logger import logger
from dex.core.db import MongoConnection
from dex.core.exceptions.indexer import IndexerBootFailure
from logging import CRITICAL, getLogger
from datetime import datetime
from elasticsearch import Elasticsearch, ElasticsearchException


logger.setup_logging()
logger = logger.get_logger('dex')
pika_logger = getLogger('pika')
pika_logger.setLevel(CRITICAL)


def initialize_workers(num_workers, target, daemon=True):
    """
    Initializes the worker processes.
    """
    workers = []
    process = None

    print '> initializing {} workers ..'.format(num_workers),

    for i in range(num_workers):
        try:
Exemple #8
0
import os
from utils.path_utils import get_app_data_path
from logger.logger import get_logger
from trainer import custom_accuracy
from trainer.decision_tree_classifier import DTClassifier
from trainer.random_forest_classifier import RFClassifier
from trainer.adaboost_classifier import AdaboostClassifier
from trainer.xgboost_classifier import XGBClassifier
from trainer.logistic_regression_classifier import LRClassifier
from trainer.knn_classifier import KNNClassifier
from trainer.ann_classifier import ANNClassifier
from non_ml.non_ml_classifier import NonMLClassifier
from utils.load_and_process import DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score, balanced_accuracy_score, accuracy_score

logger = get_logger()


class TrainingDetails(object):
    def __init__(self, ds, ds_name, seed):
        self.ds = ds
        self.ds_name = ds_name
        self.seed = seed


def run_experiment(ds, experiment, timing_key, verbose, timings):
    """

    :param ds:
    :param experiment:
    :param timing_key:
Exemple #9
0
import sys
from logger import logger
log = logger.get_logger()
class GenericException(Exception):
    def __init__(self, message):
        self.message = message
        
        log.error(str(message) + "\n")

class ConfigError(GenericException):
    pass

class ParseError(GenericException):
    pass

Exemple #10
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
# dht.py
"""
[#11] Add and implement the measurement of temperature and humidity
author: Thomas Kaulke, [email protected]
"""

from __future__ import absolute_import
import Adafruit_DHT
import conf.greenhouse_config as conf
import logger.logger as log

logging = log.get_logger()
lib = conf.lib
sensor = Adafruit_DHT.DHT22
pin = conf.DHT_PIN


def get_values():
    global temperature
    global humidity
    logging.info('Get temperature and humidity values.')
    humidity, temperature = Adafruit_DHT.read_retry(sensor, pin)

    if humidity is not None and temperature is not None:
        logging.info(('{0}{1}{2}'.format(conf.temp_format, lib.space,
                                         conf.hum_format)).format(
                                             temperature, humidity))
    else:
        logging.warning(
Exemple #11
0
import time

import praw

from logger import logger

logger = logger.get_logger('RedditHandler', './logfile.log')


class RedditHandler:
    @staticmethod
    def get_subreddit(sub_to_init: str):
        reddit = praw.Reddit()
        subreddit = reddit.subreddit(sub_to_init)
        return subreddit

    @staticmethod
    def reply_to_submission(submission: praw.Reddit.submission, markdown: str):
        """
        Attempts to post comment to reddit submission; sleeps
        and retries if ratelimit enforced by reddit
        :param submission: praw.Reddit.submission, submission to reply to
        :param markdown: str, formatted markdown for reddit
        :return: nothing
        """
        if markdown is not None:
            logger.info('attempting reply...')
            try:
                submission.reply(markdown)
            except praw.exceptions.APIException as e:
                logger.error(e.message)
Exemple #12
0
import re

import requests

from logger import logger
from stores.registration import register

newegg_logger = logger.get_logger('Newegg', './logfile.log')


def convert_mobile_url(url: str):
    """
    Check for m.newegg.com...
    :param url: str, mobile newegg url
    :return: str, non-mobile link
    """
    if 'm.newegg.com' in url:
        base_url = 'https://www.newegg.com/Product/Product.aspx?Item='
        pattern = '(?s)(?<=products/)[A-Za-z0-9]*'
        item = re.search(pattern, url)
        try:
            item = item.group(0).strip()
        except AttributeError as e:
            newegg_logger.error(f'{e.__class__}: {e}')
            return None
        else:
            url = base_url + item
    return url


def get_page(url: str):
Exemple #13
0
import re
import requests

from lxml import html

from stores.registration import register
from logger import logger

bestbuy_logger = logger.get_logger('BestBuy', './logfile.log')


def get_page(url: str):
    """Simple request based on url"""
    headers = {
        'DNT': '1',
        'Host': 'www.bestbuy.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    }
    return requests.get(url, headers=headers)


def get_price(text: str):
    """
    Parses for price
    :param text: str, from requests.get().text
    :return: int, rounded, if exists; else None
    """
    pattern = '(?<=customerPrice":)(.*?)(?=,)'
    data = re.search(pattern, text)
    try:
        data = data.group(0).strip()
Exemple #14
0
import re
import requests

from lxml import html

from stores.registration import register
from logger import logger


bestbuy_logger = logger.get_logger('BestBuy', './logfile.log')


def get_page(url: str):
    """Simple request based on url"""
    headers = {
        'DNT': '1',
        'Host': 'www.bestbuy.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    }
    return requests.get(url, headers=headers)


def get_price(text: str):
    """
    Parses for price
    :param text: str, from requests.get().text
    :return: int, rounded, if exists; else None
    """
    pattern = '(?<=customerPrice":)(.*?)(?=,)'
    data = re.search(pattern, text)
    try:
Exemple #15
0
class Flusher(object):
    """
    The flusher is responsible for translating Collectd metrics to CloudWatch MetricDataStatistic, 
    batching, aggregating and flushing metrics to CloudWatch endpoints.
    
    Keyword arguments:
    config_helper -- The ConfigHelper object with configuration loaded
    """
    
    _LOGGER = get_logger(__name__)
    _FLUSH_INTERVAL_IN_SECONDS = 60
    _FLUSH_DELTA_IN_SECONDS = 1 
    _MAX_METRICS_PER_PUT_REQUEST = 20
    _MAX_METRICS_TO_AGGREGATE = 2000 

    def __init__(self, config_helper):
        self.lock = threading.Lock()
        self.client = None
        self.config = config_helper
        self.metric_map = {}
        self.last_flush_time = time.time()
        self.nan_key_set = set()
        self.enable_high_resolution_metrics = config_helper.enable_high_resolution_metrics
        self.flush_interval_in_seconds = int(config_helper.flush_interval_in_seconds if config_helper.flush_interval_in_seconds else self._FLUSH_INTERVAL_IN_SECONDS)
        self.max_metrics_to_aggregate = self._MAX_METRICS_PER_PUT_REQUEST if self.enable_high_resolution_metrics else self._MAX_METRICS_TO_AGGREGATE
        self.client = PutClient(self.config)

    def is_numerical_value(self, value):
        """
        Assume that the value from collectd to this plugin is float or Integer, if string transfer from collectd to this interface,
        we should modify the method  _add_values_to_metric, to convert the string type value to float type value.

        Returns:
            True if the value is float and is not nan
            False if the value is nan
        """
        try:
            return not math.isnan(float(value))
        except ValueError:
            return False

    def add_metric(self, value_list):
        """
        Translates Collectd metrics to CloudWatch format and stores them in flusher for further processing
        such as batching and aggregating.

        Keyword arguments:
        value_list -- The ValueList object passed by Collectd to the write callback
        """
        with self.lock:
            # The flush operation should take place before adding metric for a new minute.
            # Together with flush delta this ensures that old metrics are flushed before or at the start of a new minute.
            self._flush_if_need(time.time())
            if self.config.whitelist.is_whitelisted(self._get_metric_key(value_list)):
                self._aggregate_metric(value_list)

    def _flush_if_need(self, current_time):
        """ 
        Checks if metrics should be flushed and starts the flush procedure
        """
        if self._is_flush_time(current_time):
            if self.config.debug and self.metric_map:
                state = ""
                for dimension_metrics in self.metric_map:
                    state += str(dimension_metrics) + "[" + str(self.metric_map[dimension_metrics][0].statistics.sample_count) + "] "
                self._LOGGER.info("[debug] flushing metrics " + state)
            self._flush()
    
    def _is_flush_time(self, current_time):
        if self.enable_high_resolution_metrics:
            return (current_time - self.last_flush_time) >= self.flush_interval_in_seconds + self._FLUSH_DELTA_IN_SECONDS
        return (current_time - self.last_flush_time) + self._FLUSH_DELTA_IN_SECONDS >= self.flush_interval_in_seconds

    def record_nan_value(self, key, value_list):
        if key not in self.nan_key_set:
            self._LOGGER.warning(
                "Adding Metric value is not numerical, key: " + key + " value: " + str(value_list.values))
            self.nan_key_set.add(key)

    def _aggregate_metric(self, value_list):
        """
        Selects existing metric or adds a new metric to the metric_map. Then aggregates values from ValueList with the selected metric.
        If the size of metric_map is above the limit, new metric will not be added and the value_list will be dropped.
        """
        nan_value_count = 0
        dimension_key = self._get_metric_key(value_list)
        adjusted_time = int(value_list.time)

        key = dimension_key
        if self.enable_high_resolution_metrics:
            key = dimension_key + "-" + str(adjusted_time)
        if key in self.metric_map:
            nan_value_count = self._add_values_to_metrics(self.metric_map[key], value_list)
        else:
            if len(self.metric_map) < self.max_metrics_to_aggregate:
                nan_value_count = self._add_metric_to_queue(value_list, adjusted_time, key)
            else:
                if self.enable_high_resolution_metrics:
                    if self.config.debug and self.metric_map:
                        state = ""
                        for dimension_metrics in self.metric_map:
                            state += str(dimension_metrics) + "[" + str(self.metric_map[dimension_metrics][0].statistics.sample_count) + "] "
                        self._LOGGER.info("[debug] flushing metrics " + state)
                    self._flush()
                    nan_value_count = self._add_metric_to_queue(value_list, adjusted_time, key)
                else:
                    self._LOGGER.warning("Batching queue overflow detected. Dropping metric.")
        if nan_value_count:
            self.record_nan_value(dimension_key, value_list)

    def _add_metric_to_queue(self, value_list, adjusted_time, key):
        nan_value_count = 0
        metrics = MetricDataBuilder(self.config, value_list, adjusted_time).build()
        nan_value_count = self._add_values_to_metrics(metrics, value_list)
        if nan_value_count != len(value_list.values):
            self.metric_map[key] = metrics
        return nan_value_count

    def _get_metric_key(self, value_list):
        """
        Generates key for the metric. The key must use both metric_name and plugin instance to ensure uniqueness.
        """ 
        return value_list.plugin + "-" + value_list.plugin_instance + "-" + value_list.type + "-" +value_list.type_instance

    def _add_values_to_metrics(self, dimension_metrics, value_list):
        """
        Aggregates values from value_list with existing metric
        Add the valid value to the metric and just skip the nan value.

        Returns:
            return the count of the nan value in value_list
        """
        
        for metric in dimension_metrics:
            nan_value_count = 0
            for value in value_list.values:
                if self.is_numerical_value(value):
                    metric.add_value(value)
                else:
                    nan_value_count += 1
        return nan_value_count

    def _flush(self):
        """
        Batches and puts metrics to CloudWatch
        """
        self.last_flush_time = time.time()
        metric_map_size = len(self.metric_map)
        if self.metric_map:
            prepare_batch = self._prepare_batch()
            try:
                while True:
                    metric_batch = prepare_batch.next()
                    if not metric_batch:
                        break
                    self.client.put_metric_data(MetricDataStatistic.NAMESPACE, metric_batch)
                    if len(metric_batch) < self._MAX_METRICS_PER_PUT_REQUEST:
                        break
            except StopIteration, e:
                if metric_map_size % self._MAX_METRICS_PER_PUT_REQUEST != 0 or len(self.metric_map) != 0:
                    self._LOGGER.error("_flush error: " + str(e) + "  Original map size: " + str(metric_map_size))
Exemple #16
0
def main():
    logger = get_logger(__name__)
    logger.info("hello project one!")
    lib_one_main()
Exemple #17
0
import re
import requests

from lxml import html

from logger import logger
from stores.registration import register
from templates import eb_template


ebay_logger = logger.get_logger('Ebay', './logfile.log')


def convert_pages_url(url: str):
    """
    Given ebay Pages url, retrieve item page and continue parsing.  If
    given standard url, immediately return it
    :param url: str, ebay url
    :return: str, /itm/ url; else None
    """
    if 'ebay.com/p/' in url:
        text = get_page(url).text
        base_url = 'https://www.ebay.com/itm/'
        pattern = '(?s)(?<=data-itemid=")(.*?)(?=")'
        item = re.search(pattern, text)
        try:
            item = item.group(0).strip()
        except AttributeError as e:
            ebay_logger.error(f'{e.__class__}:{e}')
            return None
        else:
Exemple #18
0
Entry Point for the Search API.
"""

import tornado.web
from search.controllers import DefaultHandler
from search.controllers import query_handler
from search.controllers import metric_handler
from search.controllers import autosuggest
from logger import logger
from cfg.loader import cfg

# ------------------------------------------------------------------------------
#   Configure Logging
# ------------------------------------------------------------------------------
logger.setup_logging()
logger = logger.get_logger('search_api')

# ------------------------------------------------------------------------------
#   Configure Application
# ------------------------------------------------------------------------------
application = tornado.web.Application(
    [
        (r"/", DefaultHandler),
        (r"/query", query_handler.QueryHandler),
        (r"/metrics/([A-z0-9]+)", metric_handler.MetricHandler),
        (r"/_auto", autosuggest.AutoSuggest),
    ]
    , debug=True)


def main():
import json
import re
import requests

from lxml import html

from stores.registration import register
from templates import mc_template
from logger import logger


mc_logger = logger.get_logger('Microcenter', './logfile.log')


def strip_url(url: str):
    """
    Given a Microcenter URL, if query string is present, strip it and return
    stripped string.  Titles of products should not contain '?'
    :param url: str, microcenter url
    :return: str, stripped of query string as necessary
    """
    if 'storeID=' in url:
        begin_id = url.find('storeID=')
        end_id = begin_id + 11  # len('storeID=095')
        url = url[:begin_id] + url[end_id:]
    mc_logger.info(f'url: {url}')
    return url


def get_page(url: str, store_num: str= '095'):
    """
Exemple #20
0
import time

import praw

from logger import logger


logger = logger.get_logger('RedditHandler', './logfile.log')


class RedditHandler:

    @staticmethod
    def get_subreddit(sub_to_init: str):
        reddit = praw.Reddit()
        subreddit = reddit.subreddit(sub_to_init)
        return subreddit

    @staticmethod
    def reply_to_submission(submission: praw.Reddit.submission, markdown: str):
        """
        Attempts to post comment to reddit submission; sleeps
        and retries if ratelimit enforced by reddit
        :param submission: praw.Reddit.submission, submission to reply to
        :param markdown: str, formatted markdown for reddit
        :return: nothing
        """
        if markdown is not None:
            logger.info('attempting reply...')
            try:
                submission.reply(markdown)
Exemple #21
0
 def __init__(self, sub_to_stream: str):
     self.logger = logger.get_logger('Bot', './logfile.log')
     self.logger.info(f'initializing on {sub_to_stream}...')
     self.subreddit = RedditHandler.get_subreddit(sub_to_stream)
     self.logger.info('initialized')
Exemple #22
0
from flask import Flask, Response, request
import json
from logger.logger import get_logger
from config.config import LOGGER_CONFIG
from handlers.http import HTTPHandler
from handlers.predict import ModelPredict
from handlers import model

app = Flask(__name__)
logger = get_logger(LOGGER_CONFIG)
model = model.load(logger)


@app.route('/<SERVICE-PREFIX>/health', methods=['GET'])
def health():
    res = {"message": "I am alive"}
    return Response(json.dumps(res), 200, mimetype='application/json')


@app.route('/<SERVICE-PREFIX>/v1/predict', methods=['POST'])
def predict():
    req = HTTPHandler(request.get_json(), request.headers.get('X-Request-ID'))

    req_id, roi, err, res_stat = req.validate(logger)
    if err is not None:
        return Response(err, mimetype='application/json', status=res_stat.code)

    pred = ModelPredict(model, roi)
    res, err, res_stat = pred.run(req_id, logger)
    if err is not None:
        return Response(err, mimetype='application/json', status=res_stat.code)
Exemple #23
0
def main():
    logger = get_logger(__name__)
    logger.info("hello lib one")
Exemple #24
0
import requests

from lxml import etree, html

from stores.registration import register
from logger import logger


frys_logger = logger.get_logger('Frys', './logfile.log')


def get_page(url: str):
    """Simple request based on url"""
    headers = {
        'DNT': '1',
        'Host': 'www.frys.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    }
    return requests.get(url, headers=headers)


def get_price(tree: html.HtmlElement):
    """
    Parses price from page
    :param tree: html.HtmlElement from lxml
    :return: int, rounded, if exists; else None
    """
    path = '//span[@id="did_price1valuediv"]'
    try:
        price_tag = tree.xpath(path)[0].text
        price = int(round(float(price_tag[1:])))
Exemple #25
0
def logger():
    return get_logger()
import json
import re
import requests

from lxml import html

from stores.registration import register
from templates import mc_template
from logger import logger

mc_logger = logger.get_logger('Microcenter', './logfile.log')


def strip_url(url: str):
    """
    Given a Microcenter URL, if query string is present, strip it and return
    stripped string.  Titles of products should not contain '?'
    :param url: str, microcenter url
    :return: str, stripped of query string as necessary
    """
    if 'storeID=' in url:
        begin_id = url.find('storeID=')
        end_id = begin_id + 11  # len('storeID=095')
        url = url[:begin_id] + url[end_id:]
    mc_logger.info(f'url: {url}')
    return url


def get_page(url: str, store_num: str = '095'):
    """
    Given a Microcenter URL, return request object
def main(args):
    config = ConfigParser(args)
    cfg = config.config
    logger = get_logger(config.log_dir, "train")

    train_dataset = MRIBrainSegmentation(root_folder=cfg['root_folder'],
                                         image_label=cfg['train_data'],
                                         is_train=True,
                                         ignore_label=0,
                                         input_size=cfg['input_size'])
    vali_dataset = MRIBrainSegmentation(root_folder=cfg['root_folder'],
                                        image_label=cfg['validation_data'],
                                        is_train=False,
                                        ignore_label=0,
                                        input_size=cfg['input_size'])

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg["train_batch_size"],
        shuffle=True,
        num_workers=cfg["workers"],
        drop_last=True)

    vali_loader = torch.utils.data.DataLoader(
        vali_dataset,
        batch_size=cfg["vali_batch_size"],
        shuffle=False,
        num_workers=cfg["workers"],
        drop_last=False)
    if cfg['net_name'] == "deeplab":
        model = DeepLab(num_classes=1,
                        backbone=cfg['backbone'],
                        output_stride=cfg['output_stride'],
                        sync_bn=cfg['sync_bn'],
                        freeze_bn=cfg['freeze_bn'])
    else:
        model = Unet(in_channels=3, out_channels=1, init_features=32)

    criterion = getattr(loss, 'dice_loss')
    optimizer = optim.SGD(model.parameters(),
                          lr=cfg["lr"],
                          momentum=0.9,
                          weight_decay=cfg["weight_decay"])
    metrics_name = []
    scheduler = Poly_Scheduler(base_lr=cfg['lr'],
                               num_epochs=config['epoch'],
                               iters_each_epoch=len(train_loader))
    trainer = Trainer(model=model,
                      criterion=criterion,
                      optimizer=optimizer,
                      train_loader=train_loader,
                      nb_epochs=config['epoch'],
                      valid_loader=vali_loader,
                      lr_scheduler=scheduler,
                      logger=logger,
                      log_dir=config.save_dir,
                      metrics_name=metrics_name,
                      resume=config['resume'],
                      save_dir=config.save_dir,
                      device="cuda:0",
                      monitor="max iou_class_1",
                      early_stop=-1)
    trainer.train()
import requests

from stores.registration import register
from logger import logger

# TODO change 'store_name' to name of store being parsed
store_name_logger = logger.get_logger('Store_name', './logfile.log')


"""
The below functions are just suggestions, they do not have to be 
implemented in exactly this manner, or at all
"""


def get_html(url: str):
    # do something
    pass


def extract_from_html(pattern: str, html: str):
    # do something
    pass


def get_mpn(html: str):
    # do something
    pass


def get_price(html: str):
Exemple #29
0
import re
import requests

from stores.registration import register
from logger import logger

rakuten_logger = logger.get_logger('Rakuten', './logfile.log')


def get_page(url: str):
    """Simple request based on url"""
    headers = {
        'DNT': '1',
        'Host': 'www.rakuten.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    }
    return requests.get(url, headers=headers)


def get_price(text: str):
    """
    Parses for price
    :param text: str, from requests.get().text
    :return: int, rounded, if exists; else None
    """
    pattern = '(?<="price" content=")(.*?)(?="/>)'
    data = re.search(pattern, text)
    try:
        data = data.group(0).strip()
    except AttributeError as e:
        # Combo deals/splash pages/etc
Exemple #30
0
import pandas as pd


# Import user libraries
from configuration import Configuration
from db.sql_metadata_service import SQLMetadataService
from logger.logger import get_logger
from etl.ETL import ETL

# Get the configuration object to access config variables
configuration = Configuration()

# Create logger object with given configuration
logger = get_logger(
    logging_level=configuration.get_logging_level(),
    logs_output_file_path=configuration.get_logs_output_file_path(),
    logs_rotate_when=configuration.get_logs_rotate_when(),
    logs_rotate_backup_count=configuration.get_logs_rotate_backup_count()
)

def main(argv):
    """ Main function and the entry point of the lending club loan ETL application.

    Parameters
    ----------
    argv (Type: str list): Command line arguments

    Returns
    ----------
    None
    """
Exemple #31
0
import argparse
import os
import random
import time

from logger import logger

log = logger.get_logger(__name__)

# python -m tools.youget.download
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("url", help="video url")
    parser.add_argument("-o",
                        "--output",
                        help="output folder for downloading",
                        default=".")
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose output",
                        action="store_true")  # flag, if specified True
    args = parser.parse_args()
    output = os.path.abspath(args.output)
    log.info(f"start downloading video on {args.url} to {output}")
    os.chdir(output)  # change working dir
    for n in range(144, 149):
        cmd = 'you-get --format=dash-flv480 https://www.bilibili.com/video/BV1w7411v74u?p=' + str(
            n)
        log.info("executing: " + cmd)
        time.sleep(random.randint(0, 3))  # mock human  behavior
        res = os.system(cmd)
#!/usr/bin/env python3

import time

from crawler.config import CrawlerConfig
from crawler.crawler import Crawler
from logger.logger import get_logger

LOGGER = get_logger()
CONFIG = CrawlerConfig(LOGGER, max_depth=1)


def worker(unit: dict):
    crawler: Crawler = unit['crawler']
    try:
        (files_downloaded, exceptions) = crawler.crawl(unit['targets'])
        LOGGER.info("Downloaded %s files", files_downloaded)
        if exceptions:
            LOGGER.error("found %s errors!", len(exceptions))
            for ex in exceptions:
                LOGGER.error(ex)
    except Exception as ex:
        raise ex


def main():
    started_at = time.monotonic()
    for unit in CONFIG.workload:
        worker(unit)
    elasped_time = time.monotonic() - started_at
Exemple #33
0
import re
import requests

from lxml import html

from logger import logger
from stores.registration import register
from templates import eb_template

ebay_logger = logger.get_logger('Ebay', './logfile.log')


def convert_pages_url(url: str):
    """
    Given ebay Pages url, retrieve item page and continue parsing.  If
    given standard url, immediately return it
    :param url: str, ebay url
    :return: str, /itm/ url; else None
    """
    if 'ebay.com/p/' in url:
        text = get_page(url).text
        base_url = 'https://www.ebay.com/itm/'
        pattern = '(?s)(?<=data-itemid=")(.*?)(?=")'
        item = re.search(pattern, text)
        try:
            item = item.group(0).strip()
        except AttributeError as e:
            ebay_logger.error(f'{e.__class__}:{e}')
            return None
        else:
            url = base_url + item
Exemple #34
0
## be met: https://www.gnu.org/licenses/gpl-3.0.html.
##
## $QT_END_LICENSE$
##
#############################################################################

from string import Template
from time import sleep
from distutils.version import LooseVersion
from typing import Any, Dict, List, Optional, Tuple
import jira  # type: ignore
from config import Config
from git import FixedByTag
from logger import logger

log = logger.get_logger('jira')

comment_template = Template(
    """A change related to this issue (sha1 '$sha1') was integrated in '$repository' in the '$branch' branch.
This change will be in version: $fix_version - (JIRA: $version_id).
Subject: {{$subject}}"
""")


class JiraCloser:
    def __init__(self, config: Config) -> None:
        self.config = config
        self.jira_url = self.config.jira_url
        self.jira_client = jira.JIRA(self.jira_url,
                                     oauth=self.config.get_oauth_data())
Exemple #35
0
class Flusher(object):
    """
    The flusher is responsible for translating Collectd metrics to CloudWatch MetricDataStatistic, 
    batching, aggregating and flushing metrics to CloudWatch endpoints.
    
    Keyword arguments:
    config_helper -- The ConfigHelper object with configuration loaded
    """

    _LOGGER = get_logger(__name__)
    _FLUSH_INTERVAL_IN_SECONDS = 60
    _FLUSH_DELTA_IN_SECONDS = 1
    _MAX_METRICS_PER_PUT_REQUEST = 20
    _MAX_METRICS_TO_AGGREGATE = 2000

    def __init__(self, config_helper):
        self.lock = threading.Lock()
        self.client = None
        self.config = config_helper
        self.metric_map = {}
        self.last_flush_time = time.time()

    def add_metric(self, value_list):
        """
        Translates Collectd metrics to CloudWatch format and stores them in flusher for further processing
        such as batching and aggregating.

        Keyword arguments:
        value_list -- The ValueList object passed by Collectd to the write callback
        """
        with self.lock:
            # The flush operation should take place before adding metric for a new minute.
            # Together with flush delta this ensures that old metrics are flushed before or at the start of a new minute.
            self._flush_if_need(time.time())
            if self.config.whitelist.is_whitelisted(
                    self._get_metric_key(value_list)):
                self._aggregate_metric(value_list)

    def _flush_if_need(self, current_time):
        """ 
        Checks if metrics should be flushed and starts the flush procedure
        """
        if self._is_flush_time(current_time):
            if self.config.debug and self.metric_map:
                state = ""
                for metric in self.metric_map:
                    state += str(metric) + "[" + str(
                        self.metric_map[metric].statistics.sample_count) + "] "
                self._LOGGER.info("[debug] flushing metrics " + state)
            self._flush()

    def _is_flush_time(self, current_time):
        return (
            current_time - self.last_flush_time
        ) + self._FLUSH_DELTA_IN_SECONDS >= self._FLUSH_INTERVAL_IN_SECONDS

    def _aggregate_metric(self, value_list):
        """
        Selects existing metric or adds a new metric to the metric_map. Then aggregates values from ValueList with the selected metric.
        If the size of metric_map is above the limit, new metric will not be added and the value_list will be dropped.
        """
        key = self._get_metric_key(value_list)
        if key in self.metric_map:
            self._add_values_to_metric(self.metric_map[key], value_list)
        else:
            if len(self.metric_map) < self._MAX_METRICS_TO_AGGREGATE:
                metric = MetricDataBuilder(self.config, value_list).build()
                self.metric_map[key] = metric
                self._add_values_to_metric(metric, value_list)
            else:
                self._LOGGER.warning(
                    "Batching queue overflow detected. Dropping metric.")

    def _get_metric_key(self, value_list):
        """
        Generates key for the metric. The key must use both metric_name and plugin instance to ensure uniqueness.
        """
        return value_list.plugin + "-" + value_list.plugin_instance + "-" + value_list.type + "-" + value_list.type_instance

    def _add_values_to_metric(self, metric, value_list):
        """
        Aggregates values from value_list with existing metric
        """
        for value in value_list.values:
            metric.add_value(value)

    def _flush(self):
        """
        Batches and puts metrics to CloudWatch
        """
        self.last_flush_time = time.time()
        self.client = PutClient(self.config)
        while self.metric_map:
            metric_batch = self._prepare_batch()
            self.client.put_metric_data(MetricDataStatistic.NAMESPACE,
                                        metric_batch)

    def _prepare_batch(self):
        """
        Removes metrics from the metric_map and adds them to the batch. 
        The batch size is defined by _MAX_METRICS_PER_PUT_REQUEST.
        """
        metric_batch = []
        while len(metric_batch
                  ) < self._MAX_METRICS_PER_PUT_REQUEST and self.metric_map:
            key, metric = self.metric_map.popitem()
            metric_batch.append(metric)
        return metric_batch
Exemple #36
0
import re

import requests

from logger import logger
from stores.registration import register


newegg_logger = logger.get_logger('Newegg', './logfile.log')


def convert_mobile_url(url: str):
    """
    Check for m.newegg.com...
    :param url: str, mobile newegg url
    :return: str, non-mobile link
    """
    if 'm.newegg.com' in url:
        base_url = 'https://www.newegg.com/Product/Product.aspx?Item='
        pattern = '(?s)(?<=products/)[A-Za-z0-9]*'
        item = re.search(pattern, url)
        try:
            item = item.group(0).strip()
        except AttributeError as e:
            newegg_logger.error(f'{e.__class__}: {e}')
            return None
        else:
            url = base_url + item
    return url

Exemple #37
0
import re
import requests

from stores.registration import register
from logger import logger


rakuten_logger = logger.get_logger('Rakuten', './logfile.log')


def get_page(url: str):
    """Simple request based on url"""
    headers = {
        'DNT': '1',
        'Host': 'www.rakuten.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    }
    return requests.get(url, headers=headers)


def get_price(text: str):
    """
    Parses for price
    :param text: str, from requests.get().text
    :return: int, rounded, if exists; else None
    """
    pattern = '(?<="price" content=")(.*?)(?="/>)'
    data = re.search(pattern, text)
    try:
        data = data.group(0).strip()
    except AttributeError as e:
def logged_class(class_reference):
    log = logger.get_logger(class_reference.__name__)
    class_reference.logger = log
    return class_reference