def main():
    global logger

    args = parser.parse_args()

    if args.evaluate:
        args.save = os.path.join(os.path.dirname(args.resume), "linear", os.path.basename(args.resume), args.eval_data)
        os.makedirs(args.save, exist_ok=True)
        logger = get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__))
    else:
        os.makedirs(args.save,exist_ok=True)
        logger = get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__))

    logger.info(args)

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    main_worker(args)
Exemple #2
0
 def __init__(self, target_id):
     super().__init__(target_id)
     self.module = 'Youtube'
     self.api_key = api_key
     # 品质设置
     self.database = Database('Queues')
     self.logger = get_logger('Youtube')
Exemple #3
0
def parse_opts(opts):
	global tcp_port,udp_port,http_port,logger
	
	#create a logger
	logger =  get_logger()
	for arg,param in opts:
		if arg == '-h':
			usage()
			sys.exit(0)
		elif arg == '-u': 
			try:
				udp_port = int(param)
			except Exception as e:
				usage()
				sys.exit(0)
		elif arg == '-p':
			try:
				tcp_port = int(param)
			except Exception as e:
				usage()
				sys.exit(0)		
		elif arg == '-t':
			try:
				http_port = int(param)
			except Exception as e:
				usage()
				sys.exit(0)	
Exemple #4
0
def parse_opts(opts):
	global tcp_port,udp_port,host,logger,global_options
	#create a logger
	logger =  get_logger()
	for arg,param in opts:
		if arg == '-h':
			usage()
			sys.exit(0)
		elif arg == '-u': 
			try:
				udp_port = int(param)
				global_options["run_mode"] = "udp_mode"
			except Exception as e:
				usage()
				sys.exit(0)
		elif arg == '-t':
			try:
				tcp_port = int(param)
				global_options["run_mode"] = "tcp_mode"
			except Exception as e:
				usage()
				sys.exit(0)
		elif arg == '-r':
			try:
				host = param.strip()
			except Exception as e:
				usage()
				sys.exit(0)
def main(param_file=None):

    # setup
    p, base_path, output_dir = tools.setup(param_file)
    logger = tools.get_logger('gensim', os.path.join(output_dir, "run.log"))
    logger.info("running %s" % ' '.join(sys.argv))

    preprocess = []

    if 'stoplist' in p.as_dict():
        stoplist = open(path.join(base_path, p['stoplist'])).readlines()
        stoplist = [unicode(s.strip(), encoding='utf-8').lower() for s in stoplist]
        def remove_stopwords(sentence):
            return [word for word in sentence if not word in stoplist]
        preprocess.append(remove_stopwords)

    if 'stemmer' in p.as_dict():
        stemmer = Stemmer.Stemmer(p['stemmer'])
        preprocess.append(stemmer.stemWords)

    if p['input'].endswith('.json'):
        cor = JsonCorpus(path.join(base_path, p['input']),
                         no_below=p['no_below'],
                         no_above=p['no_above'],
                         preprocess=preprocess)
    else:
        cor = TextFilesCorpus(path.join(base_path, p['input']),
                      no_below=p['no_below'],
                      no_above=p['no_above'],
                      preprocess=preprocess)

    MmCorpus.serialize(path.join(output_dir, p['corpus_name']), cor, progress_cnt=10000)
    cor.dictionary.save(path.join(output_dir, p['dict_name']))
Exemple #6
0
def parse_opts(opts):
	global tcp_port,udp_port,http_port,logger
	
	#create a logger
	logger =  get_logger()
	for arg,param in opts:
		if arg == '-h':
			usage()
			sys.exit(0)
		elif arg == '-u': 
			try:
				udp_port = int(param)
			except Exception as e:
				usage()
				sys.exit(0)
		elif arg == '-p':
			try:
				tcp_port = int(param)
			except Exception as e:
				usage()
				sys.exit(0)		
		elif arg == '-t':
			try:
				http_port = int(param)
			except Exception as e:
				usage()
				sys.exit(0)	
Exemple #7
0
    def __init__(self, nick, logfile=None, verbosity='INFO'):
        self.nick = self.base_nick = nick

        self.logger = get_logger('ircconnection.logger', logfile, verbosity)

        # gevent pool
        self.gpool = Pool(10)

        self._valid_orders = []
Exemple #8
0
    def __init__(self, octobot):
        self.octobot = octobot

        # Logger
        self.logger = get_logger(self.__class__.__name__)

        self.performance_analyser = None
        self.time_frames = None
        self.relevant_evaluators = []
 def __init__(self, spiders):
     # 初始化RequestHandler的logger
     self.logger = tools.get_logger(__name__)
     # 初始化爬虫下载中间件
     self.middlewares = defaultdict(list)
     self.__init_first_middleware()
     self.__init_spider_middleware(spiders)
     # 设置下载间隔时间
     self.sleep_time = getattr(setting, 'SLEEP_TIME', 3)
     # 当得到一下HTTP Code时不再重试该请求
     self.abandon_code = {400, 401, 403, 404, 405, 406, 407, 410, 411, 413, 416, 500, 501, 502, 505}
def main(param_file=None):

    # setup
    p, base_path, output_dir = tools.setup(param_file)
    logger = tools.get_logger('gensim', path.join(output_dir, "run.log"))
    logger.info("running %s" % ' '.join(sys.argv))

    # in test case
    if param_file:
        files = [path.join(base_path, p['wiki_txt'])]
    else:
        files = glob.glob(path.join(base_path, p['wiki_txt']) + '*.txt')

    out = codecs.open(os.path.join(output_dir, 'wiki.json'), mode='w', encoding='utf-8')

    headline = re.compile('\[\[(.*)\]\]')
    level2 = re.compile('== (.*) ==')

    t0 = time.time()
    c = 0
    res = {}

    for file in files:
        print 'work on: %s' % file
        with codecs.open(file, encoding='utf-8') as f:
            for line in f:

                # ignore linebreaks
                if line == '\n':
                    continue

                # if headline found
                if headline.search(line):
                    if len(res) > 0:
                        out.write(json.dumps(res, encoding='utf-8', ensure_ascii=False) + '\n')
                    topic = headline.search(line).groups()[0]
                    res = {topic: {}}
                    sub = None

                elif level2.search(line):
                    sub = level2.search(line).groups()[0]
                else:
                    if not sub:
                        res[topic].setdefault('desc', []).append(line.strip())
                    else:
                        res[topic].setdefault(sub, []).append(line.strip())
        c += 1
        print 'average execution time: %f' % ((time.time() - t0) / c)
    out.write(json.dumps(res, encoding='utf-8', ensure_ascii=False) + '\n')

    print time.time() - t0
Exemple #11
0
def main(param_file=None):

    # setup
    p, base_path, output_dir = tools.setup(param_file)
    logger = tools.get_logger('gensim', path.join(output_dir, "run.log"))
    logger.info("running %s" % ' '.join(sys.argv))

    inp = codecs.open(os.path.join(p['base_path'],
                                   p['corpora_path'],
                                   p['corpus_name']),
                      mode='r', encoding='utf-8')
    out = codecs.open(os.path.join(output_dir,
                                   p['result_name']),
                      mode='w', encoding='utf-8')
    pair = re.compile('\d\.(\w+):(\w+)')
    exclude = set(string.punctuation)

    line_count = 0
    res = []

    for line in inp:
        
        # skip empty lines
        if line == "\n":
            continue
        
        # finished one entry
        if line_count % 5 == 0:
            print pair.search(line).groups()
            res.append({'terms': pair.search(line).groups(),
                        'sentences': [],
                        'sentences_tagged': [],
                        'values': []})

        # annotate sentence and add it to result
        if line_count % 5 == 1 or line_count % 5 == 2:
            res[-1]['sentences'].append(line.strip())
            cleaned = "".join(ch for ch in line.strip() if ch not in exclude)
            tagged = tools.tag(cleaned, p['senna_path'])
            res[-1]['sentences_tagged'].append(tagged)

        # add the ratings
        if line_count % 5 == 3 or line_count % 5 == 4:
            res[-1]['values'].append(float(line))

        line_count = line_count+1
    
    # store the output
    json.dump(res, out, indent=2)
def process_video(video_dict):
    """
    处理直播视频,包含bot的发送,视频下载,视频上传和存入数据库
    :param video_dict: 含有直播视频数据的dict
    :return: None
    """
    bot(f"[直播提示] {video_dict['Provide']}{video_dict.get('Title')} 正在直播 链接: {video_dict['Target']}"
        )

    logger = get_logger('Process Video')
    logger.info(f'{video_dict["Provide"]} Found A Live, starting downloader')
    video_dict['Title'] = AdjustFileName(video_dict['Title']).adjust()
    if video_dict["Provide"] == 'Youtube':
        downloader(r"https://www.youtube.com/watch?v=" + video_dict['Ref'],
                   video_dict['Title'], proxy, '720p')
    else:
        downloader(video_dict['Ref'], video_dict['Title'], proxy)
    upload_queue.put_nowait(video_dict)
def main(param_file=None):

    # setup
    p, base_path, output_dir = tools.setup(param_file)
    result_path = path.join(base_path, p['result_path'])
    logger = tools.get_logger('gensim', path.join(output_dir, "run.log"))
    logger.info("running %s" % ' '.join(sys.argv))

    logger.info('load the articles..')
    article_path = path.join(result_path, p['article_label'])
    wiki = pickle.load(open(path.join(article_path, 'articles.pickle')))

    logger.info('load dictionary and models')
    dictionary = Dictionary.load(path.join(result_path,
                                           p['model_label'],
                                           'dic.dict'))
    model_path = path.join(result_path, p['model_label'])
    lsi = LsiModel.load(path.join(model_path, 'lsi.model'))
    pre = pickle.load(open(path.join(model_path, 'pre.model')))
    if int(p['num_topics']) > lsi.num_topics:
        logger.error('model to small')
    lsi.num_topics = int(p['num_topics'])

    data = {}
    for topic, entries in wiki.iteritems():
        logger.info('working on: %s' % topic)

        data[topic] = {}
        data[topic]['keys'] = []
        vecs = []
        data[topic]['ratings'] = []
        for key, val in entries.iteritems():
            data[topic]['keys'].append(key)
            vecs.append(lsi[pre[dictionary.doc2bow(val['text'])]])
            data[topic]['ratings'].append(val['rating'])
        data[topic]['vecs'] = np.squeeze(np.array(vecs)[:, :, 1:2]).T

        U, d, _ = np.linalg.svd(data[topic]['vecs'], full_matrices=False)
        data[topic]['U'] = U
        data[topic]['d'] = d

    f = open(os.path.join(output_dir, "data.pickle"), 'wb')
    pickle.dump(data, f)
Exemple #14
0
def start_temp_daemon():
    db = Database('Queues')
    while True:
        event = []
        for target_url in db.select():
            p = YoutubeTemp(target_url)
            event.append(p)
            p.start()
        is_running = True
        while is_running:
            has_running = False
            for p in event:
                if p.is_alive():
                    has_running = True
            if not has_running:
                is_running = False
        logger = get_logger('YoutubeTemp')
        logger.info('A check has finished.')
        sleep(sec)
Exemple #15
0
def bd_upload(file):
    logger = get_logger('bd_upload')
    if enable_upload:
        if 'nt' in name:
            command = [
                f"{ABSPATH}\\BaiduPCS-Go\\BaiduPCS-Go.exe", "upload", "--nofix"
            ]
            command2 = [
                f'{ABSPATH}\\BaiduPCS-GO\\BaiduPCS-Go.exe', "share", "set"
            ]
        else:
            command = [
                f"{ABSPATH}/BaiduPCS-Go/BaiduPCS-Go", "upload", "--nofix"
            ]
            command2 = [f"{ABSPATH}/BaiduPCS-Go/BaiduPCS-Go", "share", "set"]
        command.append(f"{ddir}/{file}")
        command.append("/")
        command2.append(file)
        subprocess.run(command)
        s2 = subprocess.run(command2,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            encoding='utf-8',
                            universal_newlines=True)
        share_info = s2.stdout
        if 'https' in share_info:
            share_info = share_info.replace('\n', '')
            logger.info(f'{file}: Share successful {share_info}')
        else:
            logger.error('Share failed')
            raise RuntimeError(f'{file} share failed')
        reg = r'https://pan.baidu.com/s/([A-Za-z0-9_-]{23})'
        linkre = re.compile(reg)
        link = re.search(linkre, share_info)
        try:
            link = 'https://pan.baidu.com/s/' + link.group(1)
            return link
        except AttributeError:
            logger.exception('get share link error')
            raise RuntimeError('get share link error')
    return None
def downloader(link, title, dl_proxy, quality='best'):
    logger = get_logger('Downloader')
    # co = ["streamlink", "--hls-live-restart", "--loglevel", "trace", "--force"]
    co = ["streamlink", "--hls-live-restart", "--force"]
    if enable_proxy:
        co.append('--http-proxy')
        co.append(f'http://{dl_proxy}')
        co.append('--https-proxy')
        co.append(f'https://{dl_proxy}')
    co.append("-o")
    co.append(f"{ddir}/{title}")
    co.append(link)
    co.append(quality)
    subprocess.run(co)
    paths = f'{ddir}/{title}'
    if isfile(paths):
        logger.info(f'{title} has been downloaded.')
        bot(f"[下载提示] {title} 已下载完成,等待上传")
    else:
        logger.error(f'{title} Download error, link: {link}')
        raise RuntimeError(f'{title} Download error, link: {link}')
# -*- coding: utf-8 -*-

from sources.osm_source import OSMSource
import tools
import os
import requests
import tempfile
from osm_lint_entity import OsmLintEntity

logger = tools.get_logger(__name__)


class PBFSource(OSMSource):
    """
    Source reading from .pbf file
    """
    def __init__(self, context, process_entity_callback, map_name, pbf_url):
        super(PBFSource, self).__init__(context, map_name,
                                        process_entity_callback)
        self.pbf_url = pbf_url

    def _download_map(self):
        """
        Downloads map from internet. It is up to the caller to remove this temporary file.
        :param map_name: Name of the map to download
        :param map_uri: URI of the map to download
        :return: Temprorary filename where map is downloaded
        """
        logger.info('[%s] Downloading %s', self.map_name, self.pbf_url)
        r = requests.get(self.pbf_url, stream=True)
        if not r.ok:
Exemple #18
0
 def __init__(self, target_id):
     super().__init__(target_id)
     self.module = 'Youtube'
     self.api_key = api_key
     # 品质设置
     self.logger = get_logger('Youtube')
Exemple #19
0
# -*- coding: utf-8 -*-
# @Author: lim
# @Email: [email protected]
# @Date:  2018-04-04 10:06:34
# @Last Modified by:  lim
# @Last Modified time:  2018-04-10 11:47:00
import time
from tools import get_logger
from dbs.redis_db import redis_task
from config import LOOP_INTERVAL, DOING_CLEAR

CURSOR = redis_task()
clear_redis_log = get_logger('clear_redis')


def clear_redis():
    """func for trim redis finish & failed set."""
    record = 0
    tatoal = DOING_CLEAR*86400
    while True:
        time.sleep(LOOP_INTERVAL)
        try:
            record +=LOOP_INTERVAL
            CURSOR.trim_finish_set()
            CURSOR.trim_failed_set()
            if record >= tatoal:
                CURSOR.handle_bad_doing()
                record = 0
        except Exception as e:
            clear_redis_log.error('000:clear redis error{}'.format(e.message))
from multiprocessing import Process

from bilibili import Bilibili
from config import config
from mirrativ import Mirrativ
from openrec import Openrec
from tools import check_ddir_is_exist, get_logger
from twitcasting import Twitcasting
from youtube import Youtube, start_temp_daemon


logger = get_logger()


class Event:
    def __init__(self):
        self.events_multi = []
        self.gen_process()
        logger.info(self.events_multi)

    def start(self):
        self.start_multi_task()
        if config['youtube']['enable_temp']:
            temp = Process(target=start_temp_daemon)
            temp.run()
        for event in self.events_multi:
            event.join()

    def gen_process(self):
        if config['youtube']['enable']:
            for user_config in config['youtube']['users']:
Exemple #21
0
def main(param_file=None):

    # setup
    p, base_path, output_dir = tools.setup(param_file)
    result_path = path.join(base_path, p['result_path'])
    lee_corpus = path.join(base_path, p['lee_corpus'])
    logger = tools.get_logger('gensim', path.join(output_dir, "run.log"))
    logger.info("running %s" % ' '.join(sys.argv))

    # remember starting time for runtime evaluation
    start = datetime.now()

    # load model and corpus
    logger.info('loading word mapping')
    dictionary = Dictionary.load(path.join(result_path,
                                           p['run'], p['dict_extension']))

    model_path = path.join(result_path, p['run'], p['lsi_ext'])
    logger.info('load model from: %s' % model_path)
    lsi = LsiModel.load(model_path)
    pre = SaveLoad.load(path.join(result_path, p['run'], p['pre_model_ext']))

    logging.info('load smal lee corpus and preprocess')
    with open(lee_corpus, 'r') as f:
        preproc_lee_texts = preprocessing.preprocess_documents(f.readlines())
    bow_lee_texts = [dictionary.doc2bow(text,
                                        allow_update=False,
                                        return_missing=False)
                    for text in preproc_lee_texts]

    logger.info('transforming small lee corpus (only pre model)')
    corpus_pre = pre[bow_lee_texts]

    # read the human similarity data and flatten upper triangular
    human_sim_matrix = np.loadtxt(path.join(base_path, p['human_data_file']))
    sim_m_size = np.shape(human_sim_matrix)[0]
    human_sim_vector = human_sim_matrix[np.triu_indices(sim_m_size, 1)]

    max_topics = lsi.num_topics

    logger.info("iterate from %d to %d dimensions (stepsize: %d)" %
                (p['min_dim'], max_topics, p['dim_step']))

    iter_range = range(p['min_dim'], max_topics, p['dim_step'])
    res = np.zeros(len(iter_range))
    for k, l in enumerate(iter_range):

        # do the lower dimensionality transformation
        lsi.num_topics = l
        corpus_lsi = lsi[corpus_pre]

        # compute pairwise similarity matrix of transformed corpus
        sim_matrix = np.zeros((len(corpus_lsi), len(corpus_lsi)))
        for i, par1 in enumerate(corpus_lsi):
            for j, par2 in enumerate(corpus_lsi):
                sim_matrix[i, j] = matutils.cossim(par1, par2)
        sim_vector = sim_matrix[np.triu_indices(len(corpus_lsi), 1)]

        # compute correlations
        cor = np.corrcoef(sim_vector, human_sim_vector)
        logger.info("step %d: correlation with lee data: %f" % (k, cor[0, 1]))
        res[k] = cor[0, 1]

    plt.figure()
    plt.plot(iter_range, res)
    plt.savefig(os.path.join(output_dir, 'cor_plot.' + p['plot_extension']))
    plt.close()
    np.save(path.join(output_dir, 'model_dim_res.npy'), res)

    dif = datetime.now() - start
    logger.info("finished after %d days and %d secs" % (dif.days, dif.seconds))
def main(param_file=None):

    # setup
    p, base_path, output_dir = tools.setup(param_file)
    result_path = path.join(base_path, p['result_path'])
    logger = tools.get_logger('gensim', path.join(output_dir, "run.log"))
    logger.info("running %s" % ' '.join(sys.argv))

    logger.info('loading models and dictionary')
    dictionary = Dictionary.load(path.join(result_path,
                                           p['model_label'],
                                           'dic.dict'))
    model_path = path.join(result_path, p['model_label'])
    lsi = LsiModel.load(path.join(model_path, 'lsi.model'))
    pre = pickle.load(open(path.join(model_path, 'pre.model')))
    lsi.num_topics = p['num_topics']

    logger.info('load wikipedia articles')
    article_path = path.join(result_path, p['article_label'])
    wiki = pickle.load(open(path.join(article_path, 'articles.pickle')))

    times = np.zeros((1, len(wiki)))
    count = 0
    for query_key, query in wiki.iteritems():
        logger.info("working on: %s" % query_key)
        n = len(query)
        human = [val['rating'] for val in query.itervalues()]

        t0 = time.time()
        corpus = [lsi[pre[dictionary.doc2bow(val['text'])]]
                    for val in query.itervalues()]
        sim_res = MatrixSimilarity(corpus)[corpus]
        sim_res.save(path.join(output_dir, 'sim_' + query_key))
        avg = np.mean(sim_res, axis=0)
        idx = np.argsort(avg)
        times[count] = time.time() - t0

        # compute correlation with human rating
        res = np.zeros((n, 1))
        for i in range(n):
            human_r = [human[j] for j in idx[i:]]
            res[i, 0] = np.mean(human_r)

        # plot correlation
        fig = plt.figure()
        ax = fig.add_subplot(3, 1, 1)
        ax.plot(res)

        ax = fig.add_subplot(3, 1, 2)
        ratings = [val['rating'] for val in query.itervalues()]
        ax.scatter(avg[idx], [ratings[i] for i in idx])

        # plot similarity distribution
        ax = fig.add_subplot(3, 1, 3)
        ax.bar(range(n), avg[idx])

        # Set the x tick labels to the group_labels defined above and rotate
        ax.set_xticks(range(n))
        k = [key + ' ' + str(query[key]['rating']) for key in query.keys()]
        ax.set_xticklabels([k[i] for i in idx])
        fig.autofmt_xdate()
        plt.savefig(path.join(output_dir, query_key + '.' + p['format']))
        plt.close()
    logger.info('average similarity calculation time: %f' % np.mean(times))
Exemple #23
0
 def __init__(self):
     self.PICS_DIR = "data/images"
     self.ROOT_URL = "https://www.doutula.com/photo/list/"
     self.LOGGER = tools.get_logger("doutu")
Exemple #24
0
# -*- coding: utf-8 -*-
# @Author: lim
# @Email: [email protected]
# @Date:  2018-04-02 14:31:54
# @Last Modified by:  lim
# @Last Modified time:  2018-04-09 16:11:05
import psycopg2 
from tools import get_logger, error_record
from config import PG_DB ,PG_USER, PG_PWD, PG_HOST, PG_PORT 

pd_db_log = get_logger('pgsql_db')


class PgSql(object):


    def __init__(self):
        self.conn = self.get_conn()
        self.cursor = self.get_cursor()
        #self.table_1 = self.create_table_1()
        #self.table_2 = self.create_table_2()
        #self.table_3 = self.create_table_3()


    def get_conn(self):
        try:
            return psycopg2.connect(database=PG_DB, user=PG_USER, password=PG_PWD,
                 host=PG_HOST, port=PG_PORT) 
        except Exception as e:
            error_record('200')
            pd_db_log.warning('200:Can not establish a connection to guangzhou pg DB: {}'.format(e.message))
Exemple #25
0
 def __init__(self):
     self.logger = get_logger('BDUpload')
Exemple #26
0
 def __init__(self):
     super().__init__()
     self.queue = upload_queue
     self.logger = get_logger('VideoUpload')
     self.video_info = None
import asyncio
import os
import time
import json

from nats.aio.client import Client

from tools import get_logger, make_fake_tick
logger = get_logger(__name__)

NATS_HOSTNAME = os.environ.get('NATS_HOSTNAME', 'localhost')
NATS_SERVERS = [f'nats://{NATS_HOSTNAME}:4222']

TICKER_SUBJECT_NAME = 'ticker'


async def main(event_loop):
    nats_client = Client()
    await nats_client.connect(NATS_SERVERS, loop=event_loop)
    logger.info(f"Connected to NATS at {nats_client.connected_url.netloc}...")
    logger.info(f'Publishing ticks to [{TICKER_SUBJECT_NAME}]')

    while True:
        tick = make_fake_tick()
        await nats_client.publish(TICKER_SUBJECT_NAME,
                                  json.dumps(tick).encode())
        await nats_client.flush(timeout=1)
        logger.info(f'Published: {tick}')
        time.sleep(3)

Exemple #28
0
import logging
import time
from datetime import datetime
from tools import get_clients, get_conversion, get_need_volumes
from structs import MarketInfo, Deal, ArbOpp

from tools import get_logger, send_notifier
from tools import (get_sum_on_volume, get_base_and_coin, get_usd_price,
                   get_arb_amount)

from collections import defaultdict

log = get_logger('runner_v2.log')


def fetch_order_book(func, market):
    import random
    for _ in range(10):
        try:
            return func(market)
        except Exception as e:
            if 'too often' in str(e):
                wait = 0.1 + random.random()
                time.sleep(wait)
            else:
                break
    raise RuntimeError


def process_coins(worker_id, coins_markets, config):
    open_arbs_all_coins = defaultdict(lambda: [])
Exemple #29
0
import io
import os
import time
from datetime import datetime
from threading import Event

import numpy as np
import zmq
from PIL import Image

import tools
from picamera import PiCamera, PiCameraCircularIO, array

logger = tools.get_logger('vigilant')

RECORD_RESOLUTION = (1920, 1080)
MOTION_RESOLUTION = (640, 480)
SAVE_FOLDER = '/home/pi/mnt'

CV_THRESHOLD = .9
PRESECONDS = 3
ANALYSE_PERIOD = .5
MACROBLOCK_THRESHOLD = 60
MACROBLOCK_COUNT_FOR_MOTON = 10


class Watcher(array.PiMotionAnalysis):
    def __init__(self, camera, motion_event):
        super().__init__(camera)
        self.are_some_movement = motion_event
        self.kernel = None
 def __init__(self, target_id):
     super().__init__(target_id)
     self.logger = get_logger('Twitcasting')
     self.module = 'Twitcasting'
 def __init__(self, target_id):
     super().__init__(target_id)
     self.logger = get_logger('Mirrativ')
     self.module = 'Mirrativ'
Exemple #32
0
def main():

    args = parse_option()
    os.makedirs(args.checkpoint_path, exist_ok=True)

    if not args.debug:
        os.environ['PYTHONBREAKPOINT'] = '0'
        logger = get_logger(logpath=os.path.join(args.checkpoint_path, 'logs'),
                            filepath=os.path.abspath(__file__))

        def print_pass(*args):
            logger.info(*args)

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    print(args)

    train_loader = get_train_loader(args)

    isd = ISD(args.arch, K=args.queue_size, m=args.momentum, T=args.temp)
    isd.data_parallel()
    isd = isd.cuda()

    print(isd)

    criterion = KLD().cuda()

    params = [p for p in isd.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=args.learning_rate,
                                momentum=args.sgd_momentum,
                                weight_decay=args.weight_decay)

    cudnn.benchmark = True
    args.start_epoch = 1

    if args.resume:
        print('==> resume from checkpoint: {}'.format(args.resume))
        ckpt = torch.load(args.resume)
        print('==> resume from epoch: {}'.format(ckpt['epoch']))
        isd.load_state_dict(ckpt['state_dict'], strict=True)
        optimizer.load_state_dict(ckpt['optimizer'])
        args.start_epoch = ckpt['epoch'] + 1

    # routine
    for epoch in range(args.start_epoch, args.epochs + 1):

        adjust_learning_rate(epoch, args, optimizer)
        print("==> training...")

        time1 = time.time()
        loss = train_student(epoch, train_loader, isd, criterion, optimizer,
                             args)

        time2 = time.time()
        print('epoch {}, total time {:.2f}'.format(epoch, time2 - time1))

        # saving the model
        if epoch % args.save_freq == 0:
            print('==> Saving...')
            state = {
                'opt': args,
                'state_dict': isd.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': epoch,
            }

            save_file = os.path.join(
                args.checkpoint_path,
                'ckpt_epoch_{epoch}.pth'.format(epoch=epoch))
            torch.save(state, save_file)

            # help release GPU memory
            del state
            torch.cuda.empty_cache()
Exemple #33
0
#coding=utf-8
import redis
import traceback
from tools import get_current_day, get_logger, error_record
from config import *

redis_db_log = get_logger('redis_db')
clear_redis_log = get_logger('clear_redis')


class redis_task(object):
    def __init__(self):
        self.R = redis.Redis(host=REDIS_HOST,
                             port=REDIS_PORT,
                             db=0,
                             password=REDIS_PWD)
        self.test = self.connection_test()

    def connection_test(self):
        try:
            self.R.set('test', 'test')
        except:
            error_record('101')
            redis_db_log.warning(
                '101:Can not establish a connection to local redis DB')

    def save_task_to_redis(self, task):
        """save task to todo list"""
        if self.R.dbsize() >= REDIS_AMOUNT:
            return 'full'
        try:
def main(param_file=None):

    # setup
    p, base_path, output_dir = tools.setup(param_file)
    logger = tools.get_logger('gensim', os.path.join(output_dir, "run.log"))
    logger.info("running %s" % ' '.join(sys.argv))

    # initializations
    articles = {}
    all_missing = []
    redir_on = {}
    collisions = {}
    non_ascii = []
    site = mwclient.Site('en.wikipedia.org', '/w/api.php/')

    # get all txt files in a folder and iterate over them
    filelist = glob.glob(os.path.join(base_path,
                                      p['folder_path'],
                                      "*.txt"))
    for f in filelist:

        # get the word we are working on
        f_name = os.path.basename(f)
        k_word = os.path.splitext(f_name)[0]
        logger.info("working on file: %s" % f_name)

        # try to convert the word into ascii for the http query
        file_obj = codecs.open(f, "r", "utf-16")
        counter = 0
        words = []
        for w in file_obj.readlines():
            try:
                s = w.strip().decode('ascii')
                words.append(s)
            except Exception:
                counter += 1
                non_ascii.append(w.strip())
        logger.info("\t%d words containing non ascii are ommited" % counter)

        articles[k_word] = {}
        logger.info("\tfound %d words in file" % len(words))

        for word in words:
            data = {}
            page = site.Pages[word]

            # follow the redirect and check for collisions
            if page.redirect:
                res = re.search('\[\[(.+)\]\]', page.edit())
                redir_word = urllib.unquote(res.groups()[0])
                if redir_word in redir_on:
                    logger.warning("[%s AND %s] both redirect on --> %s" %
                                    (word, redir_on[redir_word], redir_word))
                    collisions[redir_word] = redir_on[redir_word]
                else:
                    logger.info("[%s] redir from [%s]" % (redir_word, word))
                    redir_on[redir_word] = word
                text = site.Pages[redir_word].edit()
                data['redirected'] = redir_word

            else:
                text = page.edit()

            # check for missing wikipedia articles
            if  text == "":
                all_missing.append(word)
                continue

            # preprocess the received article
            data['text'] = wikicorpus.filter_wiki(text)
            in_ascii = ud.normalize('NFKD',
                                    data['text']).encode('ascii', 'ignore')
            data['text'] = preprocess_string(in_ascii)
            articles[k_word][word] = data

    logger.info('add human rating to the articles')
    id_word = {}
    sparql_path = os.path.join(base_path, p['sparql_path'])
    with open(os.path.join(sparql_path, 'id_word.txt')) as f:
        for line in f.readlines():
            idx, word = line.strip().split('\t')
            id_word[idx] = word

    #add human rating to the wikipedia data
    not_found = []
    with open(os.path.join(sparql_path, p['human_file'])) as f:
        for line in f.readlines():
            arr = line.split()
            word = id_word[arr[0]]
            term = arr[3]
            try:
                articles[word][term]['rating'] = int(arr[4])
            except KeyError:
                not_found.append(term)
    logger.info("%d words from the ref queries not found" % len(not_found))

    f = open(os.path.join(output_dir, "articles.pickle"), 'wb')
    pickle.dump(articles, f)
    f.close

    info = {}
    info['missing'] = all_missing
    info['redirs'] = redir_on
    info['collisions'] = collisions
    info['not_found'] = not_found
    info['non_ascii'] = non_ascii
    f = open(os.path.join(output_dir, "info.pickle"), 'wb')
    pickle.dump(info, f)
    f.close

    logger.info("%d redirecting collisions (see info.pkl)" % len(collisions))
# -*- coding: utf-8 -*-

import Image
import tools
import os

logger = tools.get_logger(__name__)

class HandlingImage(object):
    
    def __init__(self,path,folder_out_title,last_index):
        self.path = path
        self.folder_out_title = folder_out_title
        self.last_index = last_index
        
    def execute(self,index):
        size_thumb = (160,120)
        size_light = (1024,768)
        image = Image.open(self.path)
        filename= str(self.last_index + index).zfill(5)
        image.save(os.path.join(self.folder_out_title,filename + ".jpg"),"JPEG")
        image_light = image.copy()
        image_light.thumbnail(size_light,Image.ANTIALIAS)
        image_light.save(os.path.join(self.folder_out_title, filename+ "_light.jpg"),
                   "JPEG")
        image_thumb = image.copy()
        image_thumb.thumbnail(size_thumb,Image.ANTIALIAS)
        image_thumb.save(os.path.join(self.folder_out_title, filename + "_thumb.jpg"),
                   "JPEG")
        logger.info(u"Média %s=>%s terminé avec succès" % (self.path,filename))
from tools import get_redis, get_logger
import defaultsettings
# build-in
import sys
try:
    import cPickle as pickle
except ImportError:
    import pickle
import time
# need
import gevent

reload(sys)
sys.setdefaultencoding('utf-8')

logger = get_logger(__name__)
redis_conn = get_redis()


class EmptyError(Exception):
    pass


def log_push(func):
    def _log(request, spider):
        func(request, spider)
        logger.debug('Push request<%s> into queue', request)
    return _log


class RequestQueue(object):
Exemple #37
0
 def __init__(self):
     self.logger = get_logger('S3Upload')
     self.minio = Minio(s3_server,
                        access_key=s3_access_key,
                        secret_key=s3_secret_key,
                        secure=True)
Exemple #38
0
def main(param_file=None):

    # setup
    p, base_path, output_dir = tools.setup(param_file)
    logger = tools.get_logger('gensim', path.join(output_dir, "run.log"))
    logger.info("running %s" % ' '.join(sys.argv))
Exemple #39
0
# pylint: disable=E1101, E1103, W0632
import collections
import itertools
import numpy as np
import operator
import pandas as pd
from numpy import linalg
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier

import tools
import ranking_constants

LOGGER = tools.get_logger(__name__)


@tools.timeit
def load_data(kpi):
    """
    Function to load data from the metrics csv files and hammer file

    :param kpi: name of the kpi to analyze
    :return: dataframe containing data
    """
    LOGGER.info("Loading hammer data")
    # the first three lines do not contain meaningful data: they are dropped
    statistics = pd.read_csv(
        ranking_constants.CSV_FILES["hammer_statistics"]
    ).iloc[3:, :].set_index("timestamp")
Exemple #40
0
 def __init__(self, vinfo):
     super().__init__(None)
     self.vinfo = vinfo
     self.vid = None
     self.db = Database('Queues')
     self.logger = get_logger('YoutubeTemp')
 def __init__(self):
     self.API = BilibiliAPI()
     self.logger = get_logger('Bilibili')
     self.old_video_num = None
Exemple #42
0
def main(param_file=None):

    # setup
    p, base_path, output_dir = tools.setup(param_file)
    working_corpus = path.join(base_path, p['corpus_path'], p['corpus_name'])
    human_data_file = path.join(base_path, p['human_data_file'])
    lee_corpus = path.join(base_path, p['lee_corpus'])
    logger = tools.get_logger('gensim', path.join(output_dir, "run.log"))
    logger.info("running %s" % ' '.join(sys.argv))

    # remember starting time for runtime evaluation
    start = datetime.now()

    logger.info('loading word mapping')
    dictionary = Dictionary.load(path.join(base_path,
                                           p['corpus_path'],
                                           p['dict_name']))
    Dictionary.save(dictionary, path.join(output_dir, p['dict_name']))
    logger.info(dictionary)

    logger.info('loading corpus')
    corpus_bow = MmCorpus(working_corpus)

    logger.info("create preprocessing model and save it to disk")
    if p['pre_model'] == 'tfidf':
        pre_model = TfidfModel(corpus_bow, id2word=dictionary, normalize=True)
    elif p['pre_model'] == 'log_ent':
        pre_model = LogEntropyModel(corpus_bow,
                                    id2word=dictionary, normalize=True)
    else:
        raise ValueError('model parameter %s not known' % p['pre_model'])
    pre_model.save(os.path.join(output_dir, p['pre_model_extension']))

    logger.info('initialize LSI model')
    lsi = models.LsiModel(pre_model[corpus_bow],
                          id2word=dictionary, num_topics=p['num_topics'])
    lsi.save(os.path.join(output_dir, p['lsi_extension']))
    logger.info('finished --> lsi model saved to: %s' %
                os.path.join(output_dir, p['lsi_extension']))

    # check for correlation with lee human data
    logger.info('load smal lee corpus and preprocess')
    with open(lee_corpus, 'r') as f:
        preproc_lee_texts = preprocessing.preprocess_documents(f.readlines())
    bow_lee_texts = [dictionary.doc2bow(text,
                                        allow_update=False,
                                        return_missing=False)
                    for text in preproc_lee_texts]

    logger.info('transforming small lee corpus (LSI)')
    corpus_lsi = lsi[pre_model[bow_lee_texts]]

    # # compute pairwise similarity matrix of transformed corpus
    sim_matrix = np.zeros((len(corpus_lsi), len(corpus_lsi)))
    for i, par1 in enumerate(corpus_lsi):
        for j, par2 in enumerate(corpus_lsi):
            sim_matrix[i, j] = matutils.cossim(par1, par2)
    sim_vector = sim_matrix[np.triu_indices(len(corpus_lsi), 1)]

    # read the human similarity data and flatten upper triangular
    human_sim_matrix = np.loadtxt(human_data_file)
    sim_m_size = np.shape(human_sim_matrix)[0]
    human_sim_vector = human_sim_matrix[np.triu_indices(sim_m_size, 1)]

    # compute correlations
    cor = np.corrcoef(sim_vector, human_sim_vector)
    logger.info("correlation with lee human data: %f" % cor[0, 1])

    dif = start - datetime.now()
    logger.info("finished after %d days and %d secs" % (dif.days, dif.seconds))
Exemple #43
0
from settings import IDC_TAG
from settings import SALT_CHECK
from consul import consul
from tools import get_logger, switch

import getinfo
import sys
import os
import requests
import re
import fire
import simplejson

scmd = {'sh': '/bin/sh', 'py': '/usr/local/bin/python'}

logger = get_logger('Jenkins publish', '/www/logs/', True)

_, upstreams = getinfo.main()


def getHostname(ip):
    res = requests.get(CMDB + ip)
    hosts = res.json()
    if not hosts:
        logger.error('%s: cant find the host.' % ip)
        sys.exit(4)
    hostnames = []
    for h in hosts:
        hname = h.get('hostname')
        email = h.get('email')
        if IDC_TAG not in hname:
def main(param_file=None):

    # setup
    p, base_path, output_dir = tools.setup(param_file)
    model_path = path.join(base_path,
                           p['result_path'],
                           p['model_label'])
    logger = tools.get_logger('gensim', path.join(output_dir, "run.log"))
    logger.info("running %s" % ' '.join(sys.argv))

    # train the model on the small marketing corpus
    preprocess = []

    if 'stoplist' in p.as_dict():
        stoplist = open(path.join(base_path, p['stoplist'])).readlines()
        stoplist = [unicode(s.strip(), encoding='utf-8').lower() for s in stoplist]
        def remove_stopwords(sentence):
            return [word for word in sentence if not word in stoplist]
        preprocess.append(remove_stopwords)

    if 'stemmer' in p.as_dict():
        stemmer = Stemmer.Stemmer(p['stemmer'])
        preprocess.append(stemmer.stemWords)

    if not p['model_label']:
        cor = TextFilesCorpus(path.join(base_path, p['corpus_path']),
                              no_below=p['no_below'],
                              no_above=p['no_above'],
                              preprocess=preprocess)
        dictionary = cor.dictionary

        pre = LogEntropyModel(cor, id2word=dictionary, normalize=True)
        lsi = LsiModel(pre[cor], id2word=dictionary, num_topics=p['num_topics'])
    else:
        dictionary = Dictionary.load(path.join(model_path, p['dict_name']))
        pre = SaveLoad.load(path.join(model_path, 'pre.model'))
        lsi = LsiModel.load(path.join(model_path, 'lsi.model'))
        lsi.num_topics = p['num_topics']

    test_cor_path = path.join(base_path, p['test_cor_path'])
    test_answers, gold_answers, ratings = [], [], []


    flist = glob.glob(path.join(test_cor_path, 'corpus_3', '*.txt'))
    for file in flist:
        match = re.search('data3_(\d)_\d+.txt', file)
        ratings.append(int(match.group(1)))
        with open(file) as f:
            doc = string.join(map(string.strip, f.readlines()))
            doc = utils.tokenize(doc, lower=True)
            for func in preprocess:
                doc = func(doc)
            corpus = lsi[pre[dictionary.doc2bow(doc)]]
            test_answers.append(corpus)
    flist = glob.glob(path.join(test_cor_path, 'corpus_3_golden', '*.txt'))
    for file in flist:
        with open(file) as f:
            doc = string.join(map(string.strip, f.readlines()))
            doc = utils.tokenize(doc, lower=True)
            for func in preprocess:
                doc = func(doc)
            corpus = lsi[pre[dictionary.doc2bow(doc)]]
            gold_answers.append(corpus)


    sim = MatrixSimilarity(test_answers)[gold_answers]
    mean_sim = np.mean(sim, axis=0)
    print 'pearsons corrcoef: %f' % np.corrcoef(ratings, mean_sim)[0,1]
    print 'spearmans r: %f with p: %f' % stats.spearmanr(ratings, mean_sim)

# define what should happen when a point is picked
def onpick(event):
    plt.subplot(2, 1, 1)
    event.artist.figure.axes[0].texts = []
    plt.annotate(event.artist.name, (event.artist._x, event.artist._y))


# setup
p = build_parameters(sys.argv[1])
result_path = path.join(p['base_path'], p['result_path'])
output_dir = path.join(result_path, p['sumatra_label'])
if not path.exists(output_dir):
    os.mkdir(output_dir)
logger = tools.get_logger('gensim', path.join(output_dir, "run.log"))
logger.info("running %s" % ' '.join(sys.argv))

data = pickle.load(open(path.join(result_path,
                                  p['data_label'], 'data.pickle')))

for key, val in data.iteritems():
# for bla in [1]:
#     key, val = 'eagle', data['eagle']
    

    fig = plt.figure()
    fig.canvas.mpl_connect('pick_event', onpick)
    plt.subplot(3, 1, 1)
    plt.title(key)
 def __init__(self, target_id):
     super().__init__(target_id)
     self.logger = get_logger('Openrec')
     self.module = 'Openrec'